Skip to content

Commit a5e03e2

Browse files
Merge pull request #604 from gaurav-nelson/add-vale-script
Add script to run Vale linting on PRs
2 parents d1b1890 + 8bf913a commit a5e03e2

File tree

1 file changed

+215
-0
lines changed

1 file changed

+215
-0
lines changed

utils/vale-pr-comments.sh

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
#!/usr/bin/env bash
2+
# vale-pr-comments.sh
3+
# Run Vale once over all changed .adoc/.md files under specific paths and post a single GitHub PR comment.
4+
# Only checks for updated AsciiDoc and Markdown files under:
5+
# - content/learn/*
6+
# - content/patterns/*
7+
# - content/contribute/*
8+
# - modules/*
9+
# Always compares HEAD~1..HEAD (last commit).
10+
# Requires: git, jq, curl, vale. Environment: GITHUB_AUTH_TOKEN, PULL_NUMBER.
11+
12+
MAX_COMMENT_BYTES=65536
13+
GITHUB_API="https://api.github.com"
14+
REPO="validatedpatterns/docs"
15+
PR_NUMBER="${PULL_NUMBER:-}"
16+
BASE_SHA="HEAD~1"
17+
HEAD_SHA="HEAD"
18+
19+
log() { printf '[%s] %s\n' "$(date -u +'%Y-%m-%dT%H:%M:%SZ')" "$*"; }
20+
warn() { log "WARN: $*"; }
21+
err() { log "ERROR: $*"; }
22+
die() { err "$*"; exit 1; }
23+
24+
# Prereqs
25+
for cmd in git jq curl vale; do
26+
command -v "$cmd" >/dev/null 2>&1 || die "Required command not found: $cmd"
27+
done
28+
[ -n "${GITHUB_AUTH_TOKEN:-}" ] || die "GITHUB_AUTH_TOKEN must be set"
29+
30+
log "Collecting changed files between $BASE_SHA and $HEAD_SHA..."
31+
mapfile -t all_changed < <(git diff --name-only --diff-filter=AM "$BASE_SHA" "$HEAD_SHA" || true)
32+
33+
# Filter to target directories and extensions
34+
declare -a target_files
35+
for f in "${all_changed[@]:-}"; do
36+
case "$f" in
37+
content/learn/*|content/patterns/*|content/contribute/*|modules/*)
38+
case "$f" in
39+
*.adoc|*.md) target_files+=("$f") ;;
40+
esac
41+
;;
42+
esac
43+
done
44+
45+
if [ "${#target_files[@]}" -eq 0 ]; then
46+
log "No relevant .adoc/.md files modified in target paths. Exiting."
47+
exit 0
48+
fi
49+
log "Files to check: ${#target_files[@]}"
50+
51+
# Build changed lines map for filtering later
52+
declare -A changed_lines_map
53+
for file in "${target_files[@]}"; do
54+
if [ ! -f "$file" ]; then
55+
warn "File $file missing; skipping"
56+
continue
57+
fi
58+
mapfile -t raw_hunks < <(git diff -U0 "$BASE_SHA" "$HEAD_SHA" -- "$file" | awk '/^@@/ {print $0}' || true)
59+
if [ "${#raw_hunks[@]}" -eq 0 ]; then continue; fi
60+
declare -a lines_for_file=()
61+
for h in "${raw_hunks[@]}"; do
62+
plus=$(printf '%s' "$h" | sed -E 's/.*\+([0-9]+)(,([0-9]+))?.*/\1|\3/')
63+
start=$(printf '%s' "$plus" | cut -d'|' -f1)
64+
count=$(printf '%s' "$plus" | cut -d'|' -f2)
65+
if [ -z "$count" ] || [ "$count" = " " ]; then count=1; fi
66+
if ! [[ "$start" =~ ^[0-9]+$ ]] || ! [[ "$count" =~ ^[0-9]+$ ]]; then warn "Unexpected hunk header ($h) for $file"; continue; fi
67+
end=$((start + count - 1))
68+
for ln in $(seq "$start" "$end"); do lines_for_file+=("$ln"); done
69+
done
70+
if [ "${#lines_for_file[@]}" -gt 0 ]; then
71+
IFS=$'\n' sorted_unique=($(printf "%s\n" "${lines_for_file[@]}" | sort -n -u)); unset IFS
72+
changed_lines_map["$file"]="${sorted_unique[*]}"
73+
fi
74+
done
75+
76+
if [ "${#changed_lines_map[@]}" -eq 0 ]; then
77+
log "No changed lines found; nothing to lint. Exiting."
78+
exit 0
79+
fi
80+
81+
# Run Vale once for all target files
82+
log "Running Vale for all target files..."
83+
vale_args=()
84+
for f in "${target_files[@]}"; do vale_args+=("$f"); done
85+
VALE_OUT="/tmp/vale_combined_$.json"
86+
if ! vale --output=JSON --no-exit --minAlertLevel=error "${vale_args[@]}" >"$VALE_OUT" 2>/dev/null; then
87+
warn "vale exited nonzero; attempting to parse output if present"
88+
fi
89+
if [ ! -s "$VALE_OUT" ]; then
90+
log "Vale produced no output; no issues found."
91+
rm -f "$VALE_OUT" || true
92+
found_any=0
93+
else
94+
found_any=1
95+
fi
96+
97+
TMP_ND="/tmp/vale_issues_$.ndjson"
98+
: > "$TMP_ND"
99+
100+
if [ "$found_any" -eq 1 ]; then
101+
jq -c 'to_entries[] | .key as $file | .value[] | {file: $file, line: .Line, message: (.Message // .Match // ""), check: (.Check // ""), severity: (.Severity // ""), link: (.Link // "")}' "$VALE_OUT" >> "$TMP_ND" 2>/dev/null || true
102+
fi
103+
104+
if [ ! -s "$TMP_ND" ]; then
105+
log "No Vale issues found in output."
106+
ISSUE_COUNT=0
107+
else
108+
jq -s '.' "$TMP_ND" > "/tmp/vale_issues_$.json"
109+
changed_json='{}'
110+
for f in "${!changed_lines_map[@]}"; do
111+
IFS=' ' read -r -a arr <<< "${changed_lines_map[$f]}"
112+
lines_json=$(printf '%s\n' "${arr[@]}" | jq -R . | jq -s .)
113+
changed_json=$(jq --arg path "$f" --argjson arr "$lines_json" '. + {($path): $arr}' <<<"$changed_json")
114+
done
115+
# Filter to issues on changed lines only
116+
if ! jq --argjson changed "$changed_json" '
117+
[ .[]
118+
| select(type == "object")
119+
| select(has("line") and (.line != null) and (.line | type) == "number")
120+
| select(($changed[.file] // []) | index(.line | tostring) != null)
121+
]' "/tmp/vale_issues_$.json" > "/tmp/vale_issues_$.filtered.json" 2>/dev/null; then
122+
warn "jq filtering failed; dumping offending JSON for inspection (first 200 lines):"
123+
head -n 200 "/tmp/vale_issues_$.json" || true
124+
# Fall back to no filtering to avoid hard failure
125+
cp "/tmp/vale_issues_$.json" "/tmp/vale_issues_$.filtered.json" || true
126+
fi
127+
mv /tmp/vale_issues_$.filtered.json /tmp/vale_issues_$.json || true
128+
ISSUE_COUNT=$(jq 'length' "/tmp/vale_issues_$.json" || echo 0)
129+
fi
130+
131+
log "Issues on changed lines: ${ISSUE_COUNT:-0}"
132+
133+
build_comment() {
134+
local heading="$1" input="${2:-/tmp/vale_issues_$.json"}"
135+
{
136+
echo "$heading"; echo
137+
if [ ! -f "$input" ] || [ "$(jq 'length' "$input" 2>/dev/null || echo 0)" -eq 0 ]; then
138+
echo "All Vale issues in the modified lines have been resolved."
139+
echo; echo "<details><summary>Previous comment (kept collapsed)</summary>"; echo; echo "_Previous Vale content preserved._"; echo; echo "</details>"; echo; echo "---"; echo; echo "*This comment was automatically generated by Vale.*"; return 0
140+
fi
141+
total=$(jq 'length' "$input")
142+
echo "Vale found **$total** issue(s) in the modified lines of this PR."; echo
143+
jq -r '
144+
group_by(.file)
145+
| map(
146+
"#### " + (.[0].file) + "\n" +
147+
(map(
148+
"- " +
149+
(if has("line") and (.line!=null) then ("**Line " + (.line|tostring) + "**: ") else "" end) +
150+
(.message | gsub("\\n"; " ")) +
151+
(if (.check != "") then
152+
(if (.link != "") then (" [`[" + .check + "]`](" + .link + ")") else (" `[" + .check + "]`") end)
153+
else "" end) +
154+
(if (.severity != "") then (" (*" + .severity + "*)") else "" end)
155+
) | join("\n"))
156+
)
157+
| join("\n\n")' "$input"
158+
echo; echo "---"; echo; echo "*This comment was automatically generated by Vale on the modified lines.*"
159+
}
160+
}
161+
162+
HEADING="### 📝 Vale Linting Results"
163+
if [ "${ISSUE_COUNT:-0}" -gt 0 ]; then
164+
comment_body="$(build_comment "$HEADING" "/tmp/vale_issues_$.json")"
165+
else
166+
comment_body="$(build_comment "$HEADING" "/dev/null")"
167+
fi
168+
169+
byte_len=$(printf '%s' "$comment_body" | wc -c)
170+
if [ "$byte_len" -ge "$MAX_COMMENT_BYTES" ]; then
171+
warn "Comment size $byte_len exceeds $MAX_COMMENT_BYTES; truncating."
172+
prefix=$(printf '%s' "$comment_body" | head -c 3000)
173+
suffix=$'\n\n---\n\n[Comment truncated due to size]\n\n*Run Vale locally for full report.*'
174+
comment_body="${prefix}${suffix}"
175+
byte_len=$(printf '%s' "$comment_body" | wc -c)
176+
if [ "$byte_len" -ge "$MAX_COMMENT_BYTES" ]; then
177+
comment_body=$(printf '%s' "$comment_body" | head -c $((MAX_COMMENT_BYTES-100)) ; printf '\n\n[truncated]\n')
178+
fi
179+
fi
180+
181+
EXISTING_COMMENT_ID=""
182+
if [ -n "$PR_NUMBER" ] && [ -n "$REPO" ]; then
183+
page=1; per_page=100
184+
while : ; do
185+
resp=$(curl -sS -H "Authorization: Bearer $GITHUB_AUTH_TOKEN" -H "Accept: application/vnd.github+json" "$GITHUB_API/repos/$REPO/issues/$PR_NUMBER/comments?per_page=$per_page&page=$page")
186+
if [ -z "$resp" ] || [ "$resp" = "null" ]; then break; fi
187+
EXISTING_COMMENT_ID=$(jq -r --arg marker "### 📝 Vale Linting Results" '.[] | select(.body | contains($marker)) | .id' <<<"$resp" | head -n1 || true)
188+
count=$(jq 'length' <<<"$resp" || echo 0)
189+
if [ -n "$EXISTING_COMMENT_ID" ] || [ "$count" -lt "$per_page" ]; then break; fi
190+
page=$((page+1))
191+
done
192+
fi
193+
194+
if [ -n "$PR_NUMBER" ] && [ -n "$REPO" ]; then
195+
if [ -n "$EXISTING_COMMENT_ID" ]; then
196+
log "Updating comment id=$EXISTING_COMMENT_ID"
197+
status=$(curl -sS -o /dev/null -w "%{http_code}" -X PATCH -H "Authorization: Bearer $GITHUB_AUTH_TOKEN" -H "Accept: application/vnd.github+json" "$GITHUB_API/repos/$REPO/issues/comments/$EXISTING_COMMENT_ID" -d "$(jq -nc --arg b "$comment_body" '{body:$b}')")
198+
if [ "$status" -ge 200 ] && [ "$status" -lt 300 ]; then log "Updated (HTTP $status)"; else warn "Failed to update (HTTP $status)"; fi
199+
else
200+
if [ "${ISSUE_COUNT:-0}" -gt 0 ]; then
201+
log "Creating new comment"
202+
status=$(curl -sS -o /dev/null -w "%{http_code}" -X POST -H "Authorization: Bearer $GITHUB_AUTH_TOKEN" -H "Accept: application/vnd.github+json" "$GITHUB_API/repos/$REPO/issues/$PR_NUMBER/comments" -d "$(jq -nc --arg b "$comment_body" '{body:$b}')")
203+
if [ "$status" -ge 200 ] && [ "$status" -lt 300 ]; then log "Created (HTTP $status)"; else warn "Failed to create (HTTP $status)"; fi
204+
else
205+
log "No issues and no existing comment — nothing to post."
206+
fi
207+
fi
208+
else
209+
log "PR or repo missing; printing generated comment:"
210+
printf '%s\n' "$comment_body"
211+
fi
212+
213+
rm -f "$VALE_OUT" "$TMP_ND" "/tmp/vale_issues_$.json" || true
214+
log "Done."
215+
exit 0

0 commit comments

Comments
 (0)