-
Notifications
You must be signed in to change notification settings - Fork 297
91 lines (81 loc) · 4.24 KB
/
pr-link-scan.yml
File metadata and controls
91 lines (81 loc) · 4.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# Copyright (C) 2025 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Check hyperlinks and relative path validity
permissions:
contents: read
on:
pull_request:
branches: ["master", "master_next"]
types: [opened, reopened, ready_for_review, synchronize]
# If there is a new commit, the previous jobs will be canceled
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
check-the-validity-of-hyperlinks-in-README:
# choose different runner label for internal and external CI
runs-on: ${{ contains(github.repository, 'intel-innersource') && 'self-hosted' || 'ubuntu-latest' }}
steps:
- name: Clean Up Working Directory
run: sudo rm -rf ${{github.workspace}}/*
- name: Checkout Repo
uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2
with:
fetch-depth: 0
- name: Check the Validity of Hyperlinks
env:
BASE_SHA: ${{ github.event.pull_request.base.sha }}
run: |
cd ${{github.workspace}}
export no_proxy="localhost,127.0.0.1"
curl_timeout=10
delay=1
fail="FALSE"
merged_commit=$(git log -1 --format='%H')
changed_files="$(git diff --name-status --diff-filter=ARM $BASE_SHA ${merged_commit} | awk '/\.md$/ {print $NF}')"
echo "no_proxy=$no_proxy"
echo "http_proxy=$http_proxy"
echo "https_proxy=$https_proxy"
if [ -n "$changed_files" ]; then
for changed_file in $changed_files; do
# echo $changed_file
url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file") || true
if [ -n "$url_lines" ]; then
for url_line in $url_lines; do
# echo $url_line
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
if [[ "$url" == "https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html" || "$url" == "https://ai.cloud.intel.com/" ]]; then
echo "Link "$url" from ${{github.workspace}}/$path needs to be verified by real person."
else
sleep $delay
response=$(curl --max-time "${curl_timeout}" -L -s -o /dev/null -w "%{http_code}" -A "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" -H "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" -H "Accept-Language: en-US,en;q=0.5" "$url")|| true
if [ "$response" -ne 200 ]; then
echo "**********Validation $url failed ($response), try again**********"
response_retry=$(curl --max-time "${curl_timeout}" -s -o /dev/null -w "%{http_code}" "$url") || true
if [ "$response_retry" -eq 200 ]; then
echo "*****Retry successfully*****"
else
echo "******Retry $url failed ($response_retry), add simulated browser requests******"
response_browser=$(curl --max-time "${curl_timeout}" -s -o /dev/null -w "%{http_code}" -A "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" -H "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" -H "Accept-Language: en-US,en;q=0.5" "$url")|| true
if [ "$response_browser" -eq 200 ]; then
echo "*****Retry successfully*****"
else
echo -e "::error:: Invalid link ($response_retry) from ${{github.workspace}}/$(echo "$url_line"|cut -d':' -f1): $url"
fail="TRUE"
fi
fi
fi
fi
done
fi
done
else
echo "No changed .md file."
fi
if [[ "$fail" == "TRUE" ]]; then
exit 1
else
echo "All hyperlinks are valid."
fi
shell: bash