From 17fee1cbd39f34fa3742b22ce29788e76214e56d Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Thu, 17 Apr 2025 16:54:52 -0700 Subject: [PATCH 1/2] Script to validate URLs --- scripts/check_urls.sh | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100755 scripts/check_urls.sh diff --git a/scripts/check_urls.sh b/scripts/check_urls.sh new file mode 100755 index 00000000000..5dc60542f69 --- /dev/null +++ b/scripts/check_urls.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set -euo pipefail + +green='\e[1;32m'; red='\e[1;31m'; cyan='\e[1;36m'; yellow='\e[1;33m'; reset='\e[0m' +last= rc=0 +while IFS=: read -r f u; do + if [ "$f" != "$last" ]; then + [ -n "$last" ] && echo + printf '%s:\n' "$f" + last=$f + fi + if curl --fail -s -m10 -o /dev/null "$u"; then + printf " ${green}[OK]${reset} ${cyan}%s${reset}\n" "$u" + else + printf "${red}[FAIL]${reset} ${yellow}%s${reset}\n" "$u" + rc=1 + fi +done < <( + git --no-pager grep --no-color -I -o -E 'https?://[^[:space:]<>\")\{]+' \ + -- '*' \ + ':(exclude).*' ':(exclude)**/.*' ':(exclude)**/*.lock' ':(exclude)**/third-party/**' \ + | sed 's/[."\’]$//' +) +exit $rc From 497253288febe591e29521acac94295dc50dad39 Mon Sep 17 00:00:00 2001 From: Anthony Shoumikhin Date: Thu, 17 Apr 2025 23:46:25 -0700 Subject: [PATCH 2/2] Script to validate URLs --- scripts/check_urls.sh | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/scripts/check_urls.sh b/scripts/check_urls.sh index 5dc60542f69..8a8ca2a5815 100755 --- a/scripts/check_urls.sh +++ b/scripts/check_urls.sh @@ -7,24 +7,38 @@ set -euo pipefail +status=0 green='\e[1;32m'; red='\e[1;31m'; cyan='\e[1;36m'; yellow='\e[1;33m'; reset='\e[0m' -last= rc=0 -while IFS=: read -r f u; do - if [ "$f" != "$last" ]; then - [ -n "$last" ] && echo - printf '%s:\n' "$f" - last=$f +last_filepath= + +while IFS=: read -r filepath url; do + if [ "$filepath" != "$last_filepath" ]; then + printf '\n%s:\n' "$filepath" + last_filepath=$filepath + fi + code=$(curl -gsLm30 -o /dev/null -w "%{http_code}" -I "$url") || code=000 + if [ "$code" -ge 400 ]; then + code=$(curl -gsLm30 -o /dev/null -w "%{http_code}" -r 0-0 -A "Mozilla/5.0" "$url") || code=000 fi - if curl --fail -s -m10 -o /dev/null "$u"; then - printf " ${green}[OK]${reset} ${cyan}%s${reset}\n" "$u" + if [ "$code" -ge 200 ] && [ "$code" -lt 400 ]; then + printf "${green}%s${reset} ${cyan}%s${reset}\n" "$code" "$url" else - printf "${red}[FAIL]${reset} ${yellow}%s${reset}\n" "$u" - rc=1 + printf "${red}%s${reset} ${yellow}%s${reset}\n" "$code" "$url" >&2 + status=1 fi done < <( - git --no-pager grep --no-color -I -o -E 'https?://[^[:space:]<>\")\{]+' \ + git --no-pager grep --no-color -I -o -E \ + 'https?://[^[:space:]<>\")\{\(\$]+' \ -- '*' \ - ':(exclude).*' ':(exclude)**/.*' ':(exclude)**/*.lock' ':(exclude)**/third-party/**' \ - | sed 's/[."\’]$//' + ':(exclude).*' \ + ':(exclude)**/.*' \ + ':(exclude)**/*.lock' \ + ':(exclude)**/*.svg' \ + ':(exclude)**/*.xml' \ + ':(exclude)**/third-party/**' \ + | sed 's/[[:punct:]]*$//' \ + | grep -Ev '://(0\.0\.0\.0|127\.0\.0\.1|localhost)([:/])' \ + || true ) -exit $rc + +exit $status