Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 28 additions & 6 deletions .github/workflows/governance-reusable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -244,20 +244,42 @@ jobs:

# Baseline lookup: returns 0 (exempt) if the file appears in
# .hypatia-baseline.json with a matching rule_module + type.
# `file_pattern` glob match is intentionally NOT implemented
# here; the advisory-rollout window only honours exact `file`
# matches. Pattern support arrives with the blocking-mode
# flip and the apply-baseline.sh upgrade (see
# standards/scripts/apply-baseline.sh).
# Honours both `file` (exact) and `file_pattern` (glob) entries.
# The glob → regex translation mirrors apply-baseline.sh exactly:
# `**` matches any depth (incl. `/`); `*` matches one segment.
# Pattern support unblocks language-demo repos (absolute-zero
# carries ~30 banned-language example files under `examples/`)
# and any repo that vendors such subtrees, replacing per-file
# `.hypatia-ignore` enumeration with one `file_pattern` entry.
in_baseline() {
local target="$1"
[ -f .hypatia-baseline.json ] || return 1
command -v jq >/dev/null 2>&1 || return 1
# Note: the `as $pat` capture is essential — inside `test(...)`
# the dot rebinds to test's input ($f, a string), so
# `.file_pattern` would error with "Cannot index string". We
# capture file_pattern in $pat first, then reference it inside
# the test() argument.
jq -e \
--arg rm "$rule_module" \
--arg rt "$rule_type" \
--arg f "$target" \
'any(.[]; .rule_module == $rm and .type == $rt and .file == $f)' \
'any(.[];
.rule_module == $rm and .type == $rt
and (
(.file? // null) == $f
or (
(.file_pattern? // null) as $pat
| $pat != null
and ($f | test(
$pat
| gsub("\\*\\*"; "DOUBLESTAR")
| gsub("\\*"; "[^/]*")
| gsub("DOUBLESTAR"; ".*")
| "^" + . + "$"
))
)
))' \
.hypatia-baseline.json >/dev/null 2>&1
}

Expand Down
29 changes: 21 additions & 8 deletions scripts/apply-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,18 +74,31 @@ EXPIRED_COUNT=$((EXPIRED_COUNT - ACTIVE_COUNT))
ANNOTATED="$(jq -n \
--argjson findings "$FINDINGS_JSON" \
--argjson baseline "$ACTIVE_BASELINE" '
# Two captures are essential here:
# `f as $finding` — without this, references like `f.file` inside the
# select() get re-evaluated against the current baseline entry (the
# re-bound `.`), not the finding. Binding $finding once captures the
# finding before we enter the map(select()) over the baseline.
#
# `(.file_pattern? // null) as $pat` — inside `test(arg)` the dot
# rebinds to the input of test ($finding.file, a string), so
# referencing `.file_pattern` there would error with "Cannot index
# string". Capture the entry pattern first, then reference $pat
# inside the test() regex argument.
def match_entry(f):
$baseline
f as $finding
| $baseline
| map(select(
.severity == f.severity
and .rule_module == f.rule_module
and .type == f.type
.severity == $finding.severity
and .rule_module == $finding.rule_module
and .type == $finding.type
and (
(.file? // null) == f.file
(.file? // null) == $finding.file
or (
.file_pattern? != null
and (f.file | test(
.file_pattern
(.file_pattern? // null) as $pat
| $pat != null
and ($finding.file | test(
$pat
| gsub("\\*\\*"; "DOUBLESTAR")
| gsub("\\*"; "[^/]*")
| gsub("DOUBLESTAR"; ".*")
Expand Down
89 changes: 89 additions & 0 deletions scripts/tests/apply-baseline-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/usr/bin/env bash
# SPDX-License-Identifier: PMPL-1.0-or-later
#
# apply-baseline-test.sh — regression test for apply-baseline.sh's
# file_pattern glob handling.
#
# The original implementation referenced `.file_pattern` inside `test(...)`,
# where jq rebinds `.` to test's input (a string), causing a "Cannot index
# string" error that was silently masked by `select(...)`'s error-tolerance
# and produced an always-matches result. This test pins both the exact-file
# and glob paths so the regression cannot recur.
#
# Run: bash scripts/tests/apply-baseline-test.sh

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
APPLY="$SCRIPT_DIR/../apply-baseline.sh"
WORK="$(mktemp -d)"
trap 'rm -rf "$WORK"' EXIT

pass=0
fail=0

assert_status() {
local label="$1" findings="$2" baseline="$3" expected="$4"
local got
got=$("$APPLY" "$findings" "$baseline" advisory 2>/dev/null \
| jq -r '"\(.findings_suppressed | length),\(.findings_kept | length)"')
if [ "$got" = "$expected" ]; then
echo "PASS: $label (suppressed,kept=$got)"
pass=$((pass + 1))
else
echo "FAIL: $label expected=$expected got=$got"
fail=$((fail + 1))
fi
}

# === Case 1: exact `file` match ===
cat > "$WORK/findings1.json" <<'EOF'
[{"severity":"high","rule_module":"cicd_rules","type":"banned_language_file","file":"src/Legacy.kt"}]
EOF
cat > "$WORK/baseline1.json" <<'EOF'
[{"severity":"high","rule_module":"cicd_rules","type":"banned_language_file","file":"src/Legacy.kt"}]
EOF
assert_status "exact file match suppresses" \
"$WORK/findings1.json" "$WORK/baseline1.json" "1,0"

# === Case 2: file_pattern `examples/**` matches nested file ===
cat > "$WORK/findings2.json" <<'EOF'
[{"severity":"high","rule_module":"cicd_rules","type":"banned_language_file","file":"examples/kotlin/Nop.kt"}]
EOF
cat > "$WORK/baseline2.json" <<'EOF'
[{"severity":"high","rule_module":"cicd_rules","type":"banned_language_file","file_pattern":"examples/**"}]
EOF
assert_status "file_pattern matches nested file" \
"$WORK/findings2.json" "$WORK/baseline2.json" "1,0"

# === Case 3: file_pattern MUST NOT match unrelated file (regression
# against the always-matches bug from `.file_pattern` inside test()) ===
cat > "$WORK/findings3.json" <<'EOF'
[{"severity":"high","rule_module":"cicd_rules","type":"banned_language_file","file":"src/Unrelated.kt"}]
EOF
assert_status "file_pattern does not over-match" \
"$WORK/findings3.json" "$WORK/baseline2.json" "0,1"

# === Case 4: single-segment * does not cross / ===
cat > "$WORK/findings4a.json" <<'EOF'
[{"severity":"high","rule_module":"cicd_rules","type":"banned_language_file","file":"vendor/acme/legacy.java"}]
EOF
cat > "$WORK/findings4b.json" <<'EOF'
[{"severity":"high","rule_module":"cicd_rules","type":"banned_language_file","file":"vendor/acme/deep/legacy.java"}]
EOF
cat > "$WORK/baseline4.json" <<'EOF'
[{"severity":"high","rule_module":"cicd_rules","type":"banned_language_file","file_pattern":"vendor/*/legacy.java"}]
EOF
assert_status "single * matches one segment" \
"$WORK/findings4a.json" "$WORK/baseline4.json" "1,0"
assert_status "single * does not cross slash" \
"$WORK/findings4b.json" "$WORK/baseline4.json" "0,1"

# === Case 5: empty baseline keeps the finding ===
echo '[]' > "$WORK/empty.json"
assert_status "empty baseline keeps finding" \
"$WORK/findings2.json" "$WORK/empty.json" "0,1"

echo
echo "Total: $pass passed, $fail failed"
[ "$fail" -eq 0 ]
Loading