From fcd24a3a1af85407c4b071e7f3325baff9e5e7b5 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Sun, 14 Sep 2025 00:11:12 -0400 Subject: [PATCH 1/3] feat: begin adding docker image uses patterns Signed-off-by: William Woodruff --- crates/zizmor/src/audit/artipacked.rs | 2 +- crates/zizmor/src/audit/cache_poisoning.rs | 3 +- crates/zizmor/src/audit/impostor_commit.rs | 2 +- .../src/audit/known_vulnerable_actions.rs | 4 +- crates/zizmor/src/audit/ref_confusion.rs | 2 +- crates/zizmor/src/audit/stale_action_refs.rs | 4 +- crates/zizmor/src/audit/template_injection.rs | 4 +- crates/zizmor/src/audit/unpinned_uses.rs | 2 +- crates/zizmor/src/config.rs | 2 +- crates/zizmor/src/models/coordinate.rs | 4 +- crates/zizmor/src/models/uses.rs | 439 +----------------- crates/zizmor/src/models/uses/docker.rs | 1 + crates/zizmor/src/models/uses/repository.rs | 438 +++++++++++++++++ 13 files changed, 460 insertions(+), 447 deletions(-) create mode 100644 crates/zizmor/src/models/uses/docker.rs create mode 100644 crates/zizmor/src/models/uses/repository.rs diff --git a/crates/zizmor/src/audit/artipacked.rs b/crates/zizmor/src/audit/artipacked.rs index fefe7b915..fad12055a 100644 --- a/crates/zizmor/src/audit/artipacked.rs +++ b/crates/zizmor/src/audit/artipacked.rs @@ -5,7 +5,7 @@ use itertools::Itertools as _; use super::{Audit, AuditLoadError, audit_meta}; use crate::{ finding::{Confidence, Finding, Fix, Persona, Severity, location::Routable as _}, - models::{StepBodyCommon, StepCommon, uses::RepositoryUsesExt as _}, + models::{StepBodyCommon, StepCommon, uses::repository::RepositoryUsesExt as _}, state::AuditState, utils::split_patterns, }; diff --git a/crates/zizmor/src/audit/cache_poisoning.rs b/crates/zizmor/src/audit/cache_poisoning.rs index 8b6de714e..762bf74c4 100644 --- a/crates/zizmor/src/audit/cache_poisoning.rs +++ b/crates/zizmor/src/audit/cache_poisoning.rs @@ -9,6 +9,7 @@ use crate::finding::location::{Locatable as _, Routable}; use crate::finding::{Confidence, Finding, Fix, FixDisposition, Severity}; use crate::models::StepCommon; use crate::models::coordinate::{ActionCoordinate, ControlExpr, ControlFieldType, Toggle, Usage}; +use crate::models::uses::repository::RepositoryUsesPattern; use crate::models::workflow::{JobExt as _, NormalJob, Step, Steps}; use crate::state::AuditState; @@ -335,7 +336,7 @@ impl CachePoisoning { fn create_configurable_action_fix<'doc>( &self, - _uses_pattern: &crate::models::uses::RepositoryUsesPattern, + _uses_pattern: &RepositoryUsesPattern, control: &ControlExpr, step: &Step<'doc>, ) -> Option> { diff --git a/crates/zizmor/src/audit/impostor_commit.rs b/crates/zizmor/src/audit/impostor_commit.rs index c4fc598da..4d3f49402 100644 --- a/crates/zizmor/src/audit/impostor_commit.rs +++ b/crates/zizmor/src/audit/impostor_commit.rs @@ -13,7 +13,7 @@ use crate::{ config::Config, finding::{Confidence, Finding, Severity, location::Locatable as _}, github_api::{self, ComparisonStatus}, - models::{StepCommon, uses::RepositoryUsesExt as _, workflow::Workflow}, + models::{StepCommon, uses::repository::RepositoryUsesExt as _, workflow::Workflow}, state::AuditState, }; diff --git a/crates/zizmor/src/audit/known_vulnerable_actions.rs b/crates/zizmor/src/audit/known_vulnerable_actions.rs index d512becfb..bed228971 100644 --- a/crates/zizmor/src/audit/known_vulnerable_actions.rs +++ b/crates/zizmor/src/audit/known_vulnerable_actions.rs @@ -13,7 +13,9 @@ use crate::{ config::Config, finding::{Confidence, Finding, Fix, Severity, location::Routable as _}, github_api, - models::{StepCommon, action::CompositeStep, uses::RepositoryUsesExt as _, workflow::Step}, + models::{ + StepCommon, action::CompositeStep, uses::repository::RepositoryUsesExt as _, workflow::Step, + }, state::AuditState, }; use yamlpatch::{Op, Patch}; diff --git a/crates/zizmor/src/audit/ref_confusion.rs b/crates/zizmor/src/audit/ref_confusion.rs index e569a4ae9..1c293a3a5 100644 --- a/crates/zizmor/src/audit/ref_confusion.rs +++ b/crates/zizmor/src/audit/ref_confusion.rs @@ -16,7 +16,7 @@ use crate::models::{StepCommon, action::CompositeStep}; use crate::{ finding::{Confidence, Severity}, github_api, - models::uses::RepositoryUsesExt as _, + models::uses::repository::RepositoryUsesExt as _, state::AuditState, }; diff --git a/crates/zizmor/src/audit/stale_action_refs.rs b/crates/zizmor/src/audit/stale_action_refs.rs index 8d7bb3d4f..53303e797 100644 --- a/crates/zizmor/src/audit/stale_action_refs.rs +++ b/crates/zizmor/src/audit/stale_action_refs.rs @@ -9,7 +9,9 @@ use crate::{ config::Config, finding::{Confidence, Finding, Severity}, github_api, - models::{StepCommon, action::CompositeStep, uses::RepositoryUsesExt as _, workflow::Step}, + models::{ + StepCommon, action::CompositeStep, uses::repository::RepositoryUsesExt as _, workflow::Step, + }, state::AuditState, }; diff --git a/crates/zizmor/src/audit/template_injection.rs b/crates/zizmor/src/audit/template_injection.rs index 8c79460ee..be9e41186 100644 --- a/crates/zizmor/src/audit/template_injection.rs +++ b/crates/zizmor/src/audit/template_injection.rs @@ -33,8 +33,8 @@ use crate::{ location::{Routable as _, SymbolicLocation}, }, models::{ - self, StepCommon, action::CompositeStep, inputs::Capability, uses::RepositoryUsesPattern, - workflow::Step, + self, StepCommon, action::CompositeStep, inputs::Capability, + uses::repository::RepositoryUsesPattern, workflow::Step, }, state::AuditState, utils::{self, DEFAULT_ENVIRONMENT_VARIABLES, ExtractedExpr, extract_fenced_expressions}, diff --git a/crates/zizmor/src/audit/unpinned_uses.rs b/crates/zizmor/src/audit/unpinned_uses.rs index d6a7b1c89..2133a64d0 100644 --- a/crates/zizmor/src/audit/unpinned_uses.rs +++ b/crates/zizmor/src/audit/unpinned_uses.rs @@ -3,7 +3,7 @@ use github_actions_models::common::Uses; use super::{Audit, AuditLoadError, AuditState, audit_meta}; use crate::config::{Config, UsesPolicy}; use crate::finding::{Confidence, Finding, Persona, Severity}; -use crate::models::uses::RepositoryUsesPattern; +use crate::models::uses::repository::RepositoryUsesPattern; use crate::models::{StepCommon, action::CompositeStep, uses::UsesExt as _, workflow::Step}; pub(crate) struct UnpinnedUses; diff --git a/crates/zizmor/src/config.rs b/crates/zizmor/src/config.rs index 206501ccb..6bbd7678b 100644 --- a/crates/zizmor/src/config.rs +++ b/crates/zizmor/src/config.rs @@ -13,7 +13,7 @@ use crate::{ audit::{AuditCore, forbidden_uses::ForbiddenUses, unpinned_uses::UnpinnedUses}, finding::Finding, github_api::Client, - models::uses::RepositoryUsesPattern, + models::uses::repository::RepositoryUsesPattern, registry::input::RepoSlug, tips, }; diff --git a/crates/zizmor/src/models/coordinate.rs b/crates/zizmor/src/models/coordinate.rs index f22270b95..6146c6fcb 100644 --- a/crates/zizmor/src/models/coordinate.rs +++ b/crates/zizmor/src/models/coordinate.rs @@ -20,7 +20,7 @@ use indexmap::IndexMap; use crate::utils::ExtractedExpr; -use super::{StepBodyCommon, StepCommon, uses::RepositoryUsesPattern}; +use super::{StepBodyCommon, StepCommon, uses::repository::RepositoryUsesPattern}; pub(crate) enum ActionCoordinate { Configurable { @@ -386,7 +386,7 @@ mod tests { use crate::{ models::{ coordinate::{ControlExpr, ControlFieldType, Toggle, Usage}, - uses::RepositoryUsesPattern, + uses::repository::RepositoryUsesPattern, workflow::{Job, Workflow}, }, registry::input::InputKey, diff --git a/crates/zizmor/src/models/uses.rs b/crates/zizmor/src/models/uses.rs index e70ec7222..1d0648aea 100644 --- a/crates/zizmor/src/models/uses.rs +++ b/crates/zizmor/src/models/uses.rs @@ -1,216 +1,11 @@ //! Extension traits for the `Uses` APIs. -use std::{str::FromStr, sync::LazyLock}; +use github_actions_models::common::Uses; -use github_actions_models::common::{RepositoryUses, Uses}; -use regex::Regex; -use serde::Deserialize; +use crate::models::uses::repository::RepositoryUsesExt as _; -/// Matches all variants of [`RepositoryUsesPattern`] except `*`. -/// -/// TODO: Replace this with a real parser; this is ridiculous. -static REPOSITORY_USES_PATTERN: LazyLock = LazyLock::new(|| { - Regex::new( - r#"(?xmi) # verbose, multi-line mode, case-insensitive - ^ # start of line - ([\w-]+) # (1) owner - / # / - ([\w\.-]+|\*) # (2) repo or * - (?: # non-capturing group for optional subpath - / # / - ( # (3) subpath - [[[:graph:]]&&[^@\*]]+ # any non-space, non-@, non-* characters - | # OR - \* # * - ) # end of (3) subpath - )? # end of non-capturing group for optional subpath - (?: # non-capturing group for optional git ref - @ # @ - ([[[:graph:]]&&[^\*]]+) # (4) git ref (any non-space, non-* characters) - )? # end of non-capturing group for optional git ref - $ # end of line - "#, - ) - .unwrap() -}); - -/// Represents a pattern for matching repository `uses` references. -/// These patterns are ordered by specificity; more specific patterns -/// should be listed first. -#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)] -pub(crate) enum RepositoryUsesPattern { - /// Matches exactly `owner/repo/subpath@ref`. - ExactWithRef { - owner: String, - repo: String, - subpath: Option, - git_ref: String, - }, - /// Matches exactly `owner/repo/subpath`. - ExactPath { - owner: String, - repo: String, - subpath: String, - }, - /// Matches exactly `owner/repo`. - ExactRepo { owner: String, repo: String }, - /// Matches `owner/repo/*` (i.e. any subpath under the given repo, including - /// the empty subpath). - InRepo { owner: String, repo: String }, - /// Matches `owner/*` (i.e. any repo under the given owner). - InOwner(String), - /// Matches any `owner/repo`. - Any, -} - -impl RepositoryUsesPattern { - pub(crate) fn matches(&self, uses: &RepositoryUses) -> bool { - match self { - RepositoryUsesPattern::ExactWithRef { - owner, - repo, - subpath, - git_ref, - } => { - uses.owner.eq_ignore_ascii_case(owner) - && uses.repo.eq_ignore_ascii_case(repo) - && uses.subpath == *subpath - && uses.git_ref.as_str() == git_ref - } - RepositoryUsesPattern::ExactPath { - owner, - repo, - subpath, - } => { - // TODO: Normalize the subpath here. - // This is nontrivial, since we need to normalize - // both leading slashes *and* arbitrary ./.. components. - // Utf8Path gets us part of the way there, but is - // platform dependent (i.e. will do the wrong thing - // if the platform separator is not /). - uses.owner.eq_ignore_ascii_case(owner) - && uses.repo.eq_ignore_ascii_case(repo) - && uses.subpath.as_deref().is_some_and(|s| s == subpath) - } - RepositoryUsesPattern::ExactRepo { owner, repo } => { - uses.owner.eq_ignore_ascii_case(owner) - && uses.repo.eq_ignore_ascii_case(repo) - && uses.subpath.is_none() - } - RepositoryUsesPattern::InRepo { owner, repo } => { - uses.owner.eq_ignore_ascii_case(owner) && uses.repo.eq_ignore_ascii_case(repo) - } - RepositoryUsesPattern::InOwner(owner) => uses.owner.eq_ignore_ascii_case(owner), - RepositoryUsesPattern::Any => true, - } - } -} - -impl FromStr for RepositoryUsesPattern { - type Err = anyhow::Error; - - fn from_str(s: &str) -> Result { - if s == "*" { - return Ok(RepositoryUsesPattern::Any); - } - - let caps = REPOSITORY_USES_PATTERN - .captures(s) - .ok_or_else(|| anyhow::anyhow!("invalid pattern: {s}"))?; - - let owner = &caps[1]; - let repo = &caps[2]; - let subpath = caps.get(3).map(|m| m.as_str()); - let git_ref = caps.get(4).map(|m| m.as_str()); - - match (owner, repo, subpath, git_ref) { - (owner, "*", None, None) => Ok(RepositoryUsesPattern::InOwner(owner.into())), - (owner, repo, None, None) => Ok(RepositoryUsesPattern::ExactRepo { - owner: owner.into(), - repo: repo.into(), - }), - (_, "*", Some(_), _) => Err(anyhow::anyhow!("invalid pattern: {s}")), - (owner, repo, Some("*"), None) => Ok(RepositoryUsesPattern::InRepo { - owner: owner.into(), - repo: repo.into(), - }), - (owner, repo, Some(subpath), None) => Ok(RepositoryUsesPattern::ExactPath { - owner: owner.into(), - repo: repo.into(), - subpath: subpath.into(), - }), - (_, _, Some("*"), Some(_)) => Err(anyhow::anyhow!("invalid pattern: {s}")), - (owner, repo, subpath, Some(git_ref)) => Ok(RepositoryUsesPattern::ExactWithRef { - owner: owner.into(), - repo: repo.into(), - subpath: subpath.map(|s| s.into()), - git_ref: git_ref.into(), - }), - } - } -} - -impl<'de> Deserialize<'de> for RepositoryUsesPattern { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - let s = String::deserialize(deserializer)?; - RepositoryUsesPattern::from_str(&s).map_err(serde::de::Error::custom) - } -} - -/// Useful APIs for interacting with `uses: owner/repo` clauses. -pub(crate) trait RepositoryUsesExt { - /// Returns whether this `uses:` clause matches the given pattern. - /// - /// This uses [`RepositoryUsesPattern`] under the hood, and follows the - /// same matching rules. - fn matches(&self, pattern: &str) -> bool; - - /// Returns whether this `uses:` clause has a `git` ref and, if so, - /// whether that ref is a commit ref. - /// - /// For example, `foo/bar@baz` returns false while `foo/bar@1234...` - /// returns true. - fn ref_is_commit(&self) -> bool; - - /// Returns the `git` ref for this `uses:`, if present. - fn commit_ref(&self) -> Option<&str>; - - /// Returns the *symbolic* `git` ref for this `uses`, if present. - /// - /// Commit refs (i.e. SHA refs) are not returned. - fn symbolic_ref(&self) -> Option<&str>; -} - -impl RepositoryUsesExt for RepositoryUses { - fn matches(&self, template: &str) -> bool { - let Ok(pat) = template.parse::() else { - return false; - }; - - pat.matches(self) - } - - fn ref_is_commit(&self) -> bool { - self.git_ref.len() == 40 && self.git_ref.chars().all(|c| c.is_ascii_hexdigit()) - } - - fn commit_ref(&self) -> Option<&str> { - match &self.git_ref { - git_ref if self.ref_is_commit() => Some(git_ref), - _ => None, - } - } - - fn symbolic_ref(&self) -> Option<&str> { - match &self.git_ref { - git_ref if !self.ref_is_commit() => Some(git_ref), - _ => None, - } - } -} +pub(crate) mod docker; +pub(crate) mod repository; /// Useful APIs for interacting with all kinds of `uses:` clauses. pub(crate) trait UsesExt { @@ -243,229 +38,3 @@ impl UsesExt for Uses { } } } - -#[cfg(test)] -mod tests { - use std::str::FromStr; - - use anyhow::anyhow; - use github_actions_models::common::Uses; - - use super::RepositoryUsesPattern; - - #[test] - fn test_repositoryusespattern_parse() { - for (pattern, expected) in [ - ("", None), // Invalid, empty - ("/", None), // Invalid, not well formed - ("//", None), // Invalid, not well formed - ("///", None), // Invalid, not well formed - ("owner", None), // Invalid, sho,uld be owner/* - ("**", None), // Invalid, should be * - ("*", Some(RepositoryUsesPattern::Any)), - ( - "owner/*", - Some(RepositoryUsesPattern::InOwner("owner".into())), - ), - ("owner/*/", None), // Invalid, should be owner/* - ("owner/*/foo", None), // Invalid, not well formed - ("owner/*/*", None), // Invalid, not well formed - ("*/foo", None), // Invalid, not well formed - ("owner/repo/**", None), // Invalid, not well formed. - ( - "owner/repo/*", - Some(RepositoryUsesPattern::InRepo { - owner: "owner".into(), - repo: "repo".into(), - }), - ), - ( - "owner/repo", - Some(RepositoryUsesPattern::ExactRepo { - owner: "owner".into(), - repo: "repo".into(), - }), - ), - ( - "owner/repo/subpath", - Some(RepositoryUsesPattern::ExactPath { - owner: "owner".into(), - repo: "repo".into(), - subpath: "subpath".into(), - }), - ), - // We don't do any subpath normalization at construction time. - ( - "owner/repo//", - Some(RepositoryUsesPattern::ExactPath { - owner: "owner".into(), - repo: "repo".into(), - subpath: "/".into(), - }), - ), - ( - "owner/repo/subpath/", - Some(RepositoryUsesPattern::ExactPath { - owner: "owner".into(), - repo: "repo".into(), - subpath: "subpath/".into(), - }), - ), - ( - "owner/repo/subpath/very/nested////and/literal", - Some(RepositoryUsesPattern::ExactPath { - owner: "owner".into(), - repo: "repo".into(), - subpath: "subpath/very/nested////and/literal".into(), - }), - ), - ( - "owner/repo@v1", - Some(RepositoryUsesPattern::ExactWithRef { - owner: "owner".into(), - repo: "repo".into(), - subpath: None, - git_ref: "v1".into(), - }), - ), - ( - "owner/repo/subpath@v1", - Some(RepositoryUsesPattern::ExactWithRef { - owner: "owner".into(), - repo: "repo".into(), - subpath: Some("subpath".into()), - git_ref: "v1".into(), - }), - ), - ( - "owner/repo@172239021f7ba04fe7327647b213799853a9eb89", - Some(RepositoryUsesPattern::ExactWithRef { - owner: "owner".into(), - repo: "repo".into(), - subpath: None, - git_ref: "172239021f7ba04fe7327647b213799853a9eb89".into(), - }), - ), - ( - "pypa/gh-action-pypi-publish@release/v1", - Some(RepositoryUsesPattern::ExactWithRef { - owner: "pypa".into(), - repo: "gh-action-pypi-publish".into(), - subpath: None, - git_ref: "release/v1".into(), - }), - ), - // Invalid: no wildcards allowed when refs are present. - ("owner/repo/*@v1", None), - ("owner/repo/*/subpath@v1", None), - ("owner/*/subpath@v1", None), - ("*/*/subpath@v1", None), - // Ref also cannot be a wildcard. - ("owner/repo@*", None), - ("owner/repo@**", None), - ("owner/repo@***", None), - ("owner/repo/subpath@*", None), - ("owner/*@*", None), - ("*@*", None), - ] { - let pattern = RepositoryUsesPattern::from_str(pattern).ok(); - assert_eq!(pattern, expected); - } - } - - #[test] - fn test_repositoryusespattern_ord() { - let mut patterns = vec![ - RepositoryUsesPattern::Any, - RepositoryUsesPattern::ExactRepo { - owner: "owner".into(), - repo: "repo".into(), - }, - RepositoryUsesPattern::InOwner("owner".into()), - ]; - - patterns.sort(); - - assert_eq!( - patterns, - vec![ - RepositoryUsesPattern::ExactRepo { - owner: "owner".into(), - repo: "repo".into() - }, - RepositoryUsesPattern::InOwner("owner".into()), - RepositoryUsesPattern::Any, - ] - ); - } - - #[test] - fn test_repositoryusespattern_matches() -> anyhow::Result<()> { - for (uses, pattern, matches) in [ - // OK: case-insensitive, except subpath and tag - ("actions/checkout@v3", "Actions/Checkout@v3", true), - ("actions/checkout/foo@v3", "Actions/Checkout/foo", true), - ("actions/checkout@v3", "actions/checkout@V3", false), - // NOT OK: owner/repo do not match - ("actions/checkout@v3", "foo/checkout", false), - ("actions/checkout@v3", "actions/bar", false), - // NOT OK: subpath does not match - ("actions/checkout/foo@v3", "actions/checkout@v3", false), - // NOT OK: template is more specific than `uses:` - ("actions/checkout@v3", "actions/checkout/foo@v3", false), - // owner/repo/subpath matches regardless of ref and casing - // but only when the subpath matches. - // the subpath must share the same case but might not be - // normalized - ("actions/checkout/foo@v3", "actions/checkout/foo", true), - ("ACTIONS/CHECKOUT/foo@v3", "actions/checkout/foo", true), - // TODO: See comment in `RepositoryUsesPattern::matches` - // ("ACTIONS/CHECKOUT/foo@v3", "actions/checkout/foo/", true), - // ("ACTIONS/CHECKOUT/foo@v3", "actions/checkout/foo//", true), - // ("ACTIONS/CHECKOUT//foo////@v3", "actions/checkout/foo", true), - // owner/repo matches regardless of ref and casing - // but does not match subpaths - ("ACTIONS/CHECKOUT@v3", "actions/checkout", true), - ("actions/checkout@v3", "actions/checkout", true), - ("actions/checkout/foo@v3", "actions/checkout", false), - ("actions/somethingelse@v3", "actions/checkout", false), - ("whatever/checkout@v3", "actions/checkout", false), - // owner/repo/* matches regardless of ref and casing - // including subpaths - // but does not match when owner diverges - ("ACTIONS/CHECKOUT@v3", "actions/checkout/*", true), - ("actions/checkout@v3", "actions/checkout/*", true), - ("actions/checkout/foo@v3", "actions/checkout/*", true), - ("actions/checkout/foo/bar@v3", "actions/checkout/*", true), - ("someoneelse/checkout@v3", "actions/checkout/*", false), - // owner/* matches regardless of ref, casing, and subpath - // but rejects when owner diverges - ("ACTIONS/CHECKOUT@v3", "actions/*", true), - ("actions/checkout@v3", "actions/*", true), - ("actions/checkout/foo@v3", "actions/*", true), - ("someoneelse/checkout@v3", "actions/*", false), - // * matches everything - ("actions/checkout@v3", "*", true), - ("actions/checkout/foo@v3", "*", true), - ("whatever/checkout@v3", "*", true), - // exact matches - ("actions/checkout@v3", "actions/checkout@v3", true), - ("actions/checkout/foo@v3", "actions/checkout/foo@v3", true), - ("actions/checkout/foo@v1", "actions/checkout/foo@v3", false), - ] { - let Ok(Uses::Repository(uses)) = Uses::from_str(uses) else { - return Err(anyhow!("invalid uses: {uses}")); - }; - - let pattern = RepositoryUsesPattern::from_str(pattern)?; - - assert_eq!( - pattern.matches(&uses), - matches, - "pattern: {pattern:?}, uses: {uses:?}, matches: {matches}" - ); - } - - Ok(()) - } -} diff --git a/crates/zizmor/src/models/uses/docker.rs b/crates/zizmor/src/models/uses/docker.rs new file mode 100644 index 000000000..a4a93b802 --- /dev/null +++ b/crates/zizmor/src/models/uses/docker.rs @@ -0,0 +1 @@ +//! Patterns for Docker image `uses:` and corresponding extension traits. diff --git a/crates/zizmor/src/models/uses/repository.rs b/crates/zizmor/src/models/uses/repository.rs new file mode 100644 index 000000000..1da6292ad --- /dev/null +++ b/crates/zizmor/src/models/uses/repository.rs @@ -0,0 +1,438 @@ +//! Patterns for repository `uses:` and corresponding extension traits. + +use github_actions_models::common::RepositoryUses; +use regex::Regex; +use serde::Deserialize; +use std::{str::FromStr, sync::LazyLock}; + +/// Matches all variants of [`RepositoryUsesPattern`] except `*`. +/// +/// TODO: Replace this with a real parser; this is ridiculous. +static REPOSITORY_USES_PATTERN: LazyLock = LazyLock::new(|| { + Regex::new( + r#"(?xmi) # verbose, multi-line mode, case-insensitive + ^ # start of line + ([\w-]+) # (1) owner + / # / + ([\w\.-]+|\*) # (2) repo or * + (?: # non-capturing group for optional subpath + / # / + ( # (3) subpath + [[[:graph:]]&&[^@\*]]+ # any non-space, non-@, non-* characters + | # OR + \* # * + ) # end of (3) subpath + )? # end of non-capturing group for optional subpath + (?: # non-capturing group for optional git ref + @ # @ + ([[[:graph:]]&&[^\*]]+) # (4) git ref (any non-space, non-* characters) + )? # end of non-capturing group for optional git ref + $ # end of line + "#, + ) + .unwrap() +}); + +/// Represents a pattern for matching repository `uses` references. +/// These patterns are ordered by specificity; more specific patterns +/// should be listed first. +#[derive(Clone, Debug, Eq, PartialEq, Hash, PartialOrd, Ord)] +pub(crate) enum RepositoryUsesPattern { + /// Matches exactly `owner/repo/subpath@ref`. + ExactWithRef { + owner: String, + repo: String, + subpath: Option, + git_ref: String, + }, + /// Matches exactly `owner/repo/subpath`. + ExactPath { + owner: String, + repo: String, + subpath: String, + }, + /// Matches exactly `owner/repo`. + ExactRepo { owner: String, repo: String }, + /// Matches `owner/repo/*` (i.e. any subpath under the given repo, including + /// the empty subpath). + InRepo { owner: String, repo: String }, + /// Matches `owner/*` (i.e. any repo under the given owner). + InOwner(String), + /// Matches any `owner/repo`. + Any, +} + +impl RepositoryUsesPattern { + pub(crate) fn matches(&self, uses: &RepositoryUses) -> bool { + match self { + RepositoryUsesPattern::ExactWithRef { + owner, + repo, + subpath, + git_ref, + } => { + uses.owner.eq_ignore_ascii_case(owner) + && uses.repo.eq_ignore_ascii_case(repo) + && uses.subpath == *subpath + && uses.git_ref.as_str() == git_ref + } + RepositoryUsesPattern::ExactPath { + owner, + repo, + subpath, + } => { + // TODO: Normalize the subpath here. + // This is nontrivial, since we need to normalize + // both leading slashes *and* arbitrary ./.. components. + // Utf8Path gets us part of the way there, but is + // platform dependent (i.e. will do the wrong thing + // if the platform separator is not /). + uses.owner.eq_ignore_ascii_case(owner) + && uses.repo.eq_ignore_ascii_case(repo) + && uses.subpath.as_deref().is_some_and(|s| s == subpath) + } + RepositoryUsesPattern::ExactRepo { owner, repo } => { + uses.owner.eq_ignore_ascii_case(owner) + && uses.repo.eq_ignore_ascii_case(repo) + && uses.subpath.is_none() + } + RepositoryUsesPattern::InRepo { owner, repo } => { + uses.owner.eq_ignore_ascii_case(owner) && uses.repo.eq_ignore_ascii_case(repo) + } + RepositoryUsesPattern::InOwner(owner) => uses.owner.eq_ignore_ascii_case(owner), + RepositoryUsesPattern::Any => true, + } + } +} + +impl FromStr for RepositoryUsesPattern { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + if s == "*" { + return Ok(RepositoryUsesPattern::Any); + } + + let caps = REPOSITORY_USES_PATTERN + .captures(s) + .ok_or_else(|| anyhow::anyhow!("invalid pattern: {s}"))?; + + let owner = &caps[1]; + let repo = &caps[2]; + let subpath = caps.get(3).map(|m| m.as_str()); + let git_ref = caps.get(4).map(|m| m.as_str()); + + match (owner, repo, subpath, git_ref) { + (owner, "*", None, None) => Ok(RepositoryUsesPattern::InOwner(owner.into())), + (owner, repo, None, None) => Ok(RepositoryUsesPattern::ExactRepo { + owner: owner.into(), + repo: repo.into(), + }), + (_, "*", Some(_), _) => Err(anyhow::anyhow!("invalid pattern: {s}")), + (owner, repo, Some("*"), None) => Ok(RepositoryUsesPattern::InRepo { + owner: owner.into(), + repo: repo.into(), + }), + (owner, repo, Some(subpath), None) => Ok(RepositoryUsesPattern::ExactPath { + owner: owner.into(), + repo: repo.into(), + subpath: subpath.into(), + }), + (_, _, Some("*"), Some(_)) => Err(anyhow::anyhow!("invalid pattern: {s}")), + (owner, repo, subpath, Some(git_ref)) => Ok(RepositoryUsesPattern::ExactWithRef { + owner: owner.into(), + repo: repo.into(), + subpath: subpath.map(|s| s.into()), + git_ref: git_ref.into(), + }), + } + } +} + +impl<'de> Deserialize<'de> for RepositoryUsesPattern { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + RepositoryUsesPattern::from_str(&s).map_err(serde::de::Error::custom) + } +} + +/// Useful APIs for interacting with `uses: owner/repo` clauses. +pub(crate) trait RepositoryUsesExt { + /// Returns whether this `uses:` clause matches the given pattern. + /// + /// This uses [`RepositoryUsesPattern`] under the hood, and follows the + /// same matching rules. + fn matches(&self, pattern: &str) -> bool; + + /// Returns whether this `uses:` clause has a `git` ref and, if so, + /// whether that ref is a commit ref. + /// + /// For example, `foo/bar@baz` returns false while `foo/bar@1234...` + /// returns true. + fn ref_is_commit(&self) -> bool; + + /// Returns the `git` ref for this `uses:`, if present. + fn commit_ref(&self) -> Option<&str>; + + /// Returns the *symbolic* `git` ref for this `uses`, if present. + /// + /// Commit refs (i.e. SHA refs) are not returned. + fn symbolic_ref(&self) -> Option<&str>; +} + +impl RepositoryUsesExt for RepositoryUses { + fn matches(&self, template: &str) -> bool { + let Ok(pat) = template.parse::() else { + return false; + }; + + pat.matches(self) + } + + fn ref_is_commit(&self) -> bool { + self.git_ref.len() == 40 && self.git_ref.chars().all(|c| c.is_ascii_hexdigit()) + } + + fn commit_ref(&self) -> Option<&str> { + match &self.git_ref { + git_ref if self.ref_is_commit() => Some(git_ref), + _ => None, + } + } + + fn symbolic_ref(&self) -> Option<&str> { + match &self.git_ref { + git_ref if !self.ref_is_commit() => Some(git_ref), + _ => None, + } + } +} + +#[cfg(test)] +mod tests { + use std::str::FromStr; + + use anyhow::anyhow; + use github_actions_models::common::Uses; + + use super::RepositoryUsesPattern; + + #[test] + fn test_repositoryusespattern_parse() { + for (pattern, expected) in [ + ("", None), // Invalid, empty + ("/", None), // Invalid, not well formed + ("//", None), // Invalid, not well formed + ("///", None), // Invalid, not well formed + ("owner", None), // Invalid, sho,uld be owner/* + ("**", None), // Invalid, should be * + ("*", Some(RepositoryUsesPattern::Any)), + ( + "owner/*", + Some(RepositoryUsesPattern::InOwner("owner".into())), + ), + ("owner/*/", None), // Invalid, should be owner/* + ("owner/*/foo", None), // Invalid, not well formed + ("owner/*/*", None), // Invalid, not well formed + ("*/foo", None), // Invalid, not well formed + ("owner/repo/**", None), // Invalid, not well formed. + ( + "owner/repo/*", + Some(RepositoryUsesPattern::InRepo { + owner: "owner".into(), + repo: "repo".into(), + }), + ), + ( + "owner/repo", + Some(RepositoryUsesPattern::ExactRepo { + owner: "owner".into(), + repo: "repo".into(), + }), + ), + ( + "owner/repo/subpath", + Some(RepositoryUsesPattern::ExactPath { + owner: "owner".into(), + repo: "repo".into(), + subpath: "subpath".into(), + }), + ), + // We don't do any subpath normalization at construction time. + ( + "owner/repo//", + Some(RepositoryUsesPattern::ExactPath { + owner: "owner".into(), + repo: "repo".into(), + subpath: "/".into(), + }), + ), + ( + "owner/repo/subpath/", + Some(RepositoryUsesPattern::ExactPath { + owner: "owner".into(), + repo: "repo".into(), + subpath: "subpath/".into(), + }), + ), + ( + "owner/repo/subpath/very/nested////and/literal", + Some(RepositoryUsesPattern::ExactPath { + owner: "owner".into(), + repo: "repo".into(), + subpath: "subpath/very/nested////and/literal".into(), + }), + ), + ( + "owner/repo@v1", + Some(RepositoryUsesPattern::ExactWithRef { + owner: "owner".into(), + repo: "repo".into(), + subpath: None, + git_ref: "v1".into(), + }), + ), + ( + "owner/repo/subpath@v1", + Some(RepositoryUsesPattern::ExactWithRef { + owner: "owner".into(), + repo: "repo".into(), + subpath: Some("subpath".into()), + git_ref: "v1".into(), + }), + ), + ( + "owner/repo@172239021f7ba04fe7327647b213799853a9eb89", + Some(RepositoryUsesPattern::ExactWithRef { + owner: "owner".into(), + repo: "repo".into(), + subpath: None, + git_ref: "172239021f7ba04fe7327647b213799853a9eb89".into(), + }), + ), + ( + "pypa/gh-action-pypi-publish@release/v1", + Some(RepositoryUsesPattern::ExactWithRef { + owner: "pypa".into(), + repo: "gh-action-pypi-publish".into(), + subpath: None, + git_ref: "release/v1".into(), + }), + ), + // Invalid: no wildcards allowed when refs are present. + ("owner/repo/*@v1", None), + ("owner/repo/*/subpath@v1", None), + ("owner/*/subpath@v1", None), + ("*/*/subpath@v1", None), + // Ref also cannot be a wildcard. + ("owner/repo@*", None), + ("owner/repo@**", None), + ("owner/repo@***", None), + ("owner/repo/subpath@*", None), + ("owner/*@*", None), + ("*@*", None), + ] { + let pattern = RepositoryUsesPattern::from_str(pattern).ok(); + assert_eq!(pattern, expected); + } + } + + #[test] + fn test_repositoryusespattern_ord() { + let mut patterns = vec![ + RepositoryUsesPattern::Any, + RepositoryUsesPattern::ExactRepo { + owner: "owner".into(), + repo: "repo".into(), + }, + RepositoryUsesPattern::InOwner("owner".into()), + ]; + + patterns.sort(); + + assert_eq!( + patterns, + vec![ + RepositoryUsesPattern::ExactRepo { + owner: "owner".into(), + repo: "repo".into() + }, + RepositoryUsesPattern::InOwner("owner".into()), + RepositoryUsesPattern::Any, + ] + ); + } + + #[test] + fn test_repositoryusespattern_matches() -> anyhow::Result<()> { + for (uses, pattern, matches) in [ + // OK: case-insensitive, except subpath and tag + ("actions/checkout@v3", "Actions/Checkout@v3", true), + ("actions/checkout/foo@v3", "Actions/Checkout/foo", true), + ("actions/checkout@v3", "actions/checkout@V3", false), + // NOT OK: owner/repo do not match + ("actions/checkout@v3", "foo/checkout", false), + ("actions/checkout@v3", "actions/bar", false), + // NOT OK: subpath does not match + ("actions/checkout/foo@v3", "actions/checkout@v3", false), + // NOT OK: template is more specific than `uses:` + ("actions/checkout@v3", "actions/checkout/foo@v3", false), + // owner/repo/subpath matches regardless of ref and casing + // but only when the subpath matches. + // the subpath must share the same case but might not be + // normalized + ("actions/checkout/foo@v3", "actions/checkout/foo", true), + ("ACTIONS/CHECKOUT/foo@v3", "actions/checkout/foo", true), + // TODO: See comment in `RepositoryUsesPattern::matches` + // ("ACTIONS/CHECKOUT/foo@v3", "actions/checkout/foo/", true), + // ("ACTIONS/CHECKOUT/foo@v3", "actions/checkout/foo//", true), + // ("ACTIONS/CHECKOUT//foo////@v3", "actions/checkout/foo", true), + // owner/repo matches regardless of ref and casing + // but does not match subpaths + ("ACTIONS/CHECKOUT@v3", "actions/checkout", true), + ("actions/checkout@v3", "actions/checkout", true), + ("actions/checkout/foo@v3", "actions/checkout", false), + ("actions/somethingelse@v3", "actions/checkout", false), + ("whatever/checkout@v3", "actions/checkout", false), + // owner/repo/* matches regardless of ref and casing + // including subpaths + // but does not match when owner diverges + ("ACTIONS/CHECKOUT@v3", "actions/checkout/*", true), + ("actions/checkout@v3", "actions/checkout/*", true), + ("actions/checkout/foo@v3", "actions/checkout/*", true), + ("actions/checkout/foo/bar@v3", "actions/checkout/*", true), + ("someoneelse/checkout@v3", "actions/checkout/*", false), + // owner/* matches regardless of ref, casing, and subpath + // but rejects when owner diverges + ("ACTIONS/CHECKOUT@v3", "actions/*", true), + ("actions/checkout@v3", "actions/*", true), + ("actions/checkout/foo@v3", "actions/*", true), + ("someoneelse/checkout@v3", "actions/*", false), + // * matches everything + ("actions/checkout@v3", "*", true), + ("actions/checkout/foo@v3", "*", true), + ("whatever/checkout@v3", "*", true), + // exact matches + ("actions/checkout@v3", "actions/checkout@v3", true), + ("actions/checkout/foo@v3", "actions/checkout/foo@v3", true), + ("actions/checkout/foo@v1", "actions/checkout/foo@v3", false), + ] { + let Ok(Uses::Repository(uses)) = Uses::from_str(uses) else { + return Err(anyhow!("invalid uses: {uses}")); + }; + + let pattern = RepositoryUsesPattern::from_str(pattern)?; + + assert_eq!( + pattern.matches(&uses), + matches, + "pattern: {pattern:?}, uses: {uses:?}, matches: {matches}" + ); + } + + Ok(()) + } +} From 99d1025690b5a5e2b086a9b31b5510d9c79d476c Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Sun, 14 Sep 2025 00:29:17 -0400 Subject: [PATCH 2/3] sketching Signed-off-by: William Woodruff --- crates/zizmor/src/models/uses/docker.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/crates/zizmor/src/models/uses/docker.rs b/crates/zizmor/src/models/uses/docker.rs index a4a93b802..9eea6495d 100644 --- a/crates/zizmor/src/models/uses/docker.rs +++ b/crates/zizmor/src/models/uses/docker.rs @@ -1 +1,11 @@ -//! Patterns for Docker image `uses:` and corresponding extension traits. +//! Patterns for Docker images (including in `uses:` clauses) and corresponding extension traits. + +pub(crate) enum DockerImagePattern { + /// Matches `registry/repo/*` or `repo/*` (in the default registry case), + /// i.e. any image in the given repository. + InRepo { registry: String, repo: String }, + /// Matches `registry/*`, i.e. any image in the given registry. + InRegistry(String), + /// Matches any image. + Any, +} From 68dead207fbffa317afd3021bdd0b22be2a77e4d Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Sun, 14 Sep 2025 10:34:12 -0400 Subject: [PATCH 3/3] sketching Signed-off-by: William Woodruff --- crates/zizmor/src/models/uses/docker.rs | 40 +++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/crates/zizmor/src/models/uses/docker.rs b/crates/zizmor/src/models/uses/docker.rs index 9eea6495d..881a42288 100644 --- a/crates/zizmor/src/models/uses/docker.rs +++ b/crates/zizmor/src/models/uses/docker.rs @@ -1,9 +1,43 @@ //! Patterns for Docker images (including in `uses:` clauses) and corresponding extension traits. +use std::sync::LazyLock; + +use regex::Regex; + +// static DOCKER_IMAGE_PATTERN: LazyLock = LazyLock::new(|| { +// Regex::new( +// r#"(?xmi) # verbose, multi-line mode, case-insensitive +// ^ # start of line +// (?: # start of optional non-capturing group for [registry/] +// (? # start of capturing group for [registry] +// localhost|\w+\.\w+|\w+:\d+ # match localhost, domain-like, or domain:port +// ) # end of capturing group for [registry] +// / # / +// )? # end of optional non-capturing group for [registry/] +// (?: + +// )? +// "#, +// ) +// .unwrap() +// }); + +/// Represents a pattern for matching Docker images. +/// +/// These patterns are used for both `uses:` clauses and for other +/// audits that match image references, e.g. `unpinned-images`. pub(crate) enum DockerImagePattern { - /// Matches `registry/repo/*` or `repo/*` (in the default registry case), - /// i.e. any image in the given repository. - InRepo { registry: String, repo: String }, + /// Matches `[registry/]namespace/image`, i.e. a specific image. + ExactImage { + registry: Option, + namespace: String, + image: String, + }, + /// Matches `[registry/]namespace/*`, i.e. any image in the given namespace. + InNamespace { + registry: Option, + namespace: String, + }, /// Matches `registry/*`, i.e. any image in the given registry. InRegistry(String), /// Matches any image.