Skip to content

Commit f771497

Browse files
authored
Merge pull request #77 from ByteInternet/find-dead-links
2 parents 309d47d + 13f3550 commit f771497

File tree

1 file changed

+53
-0
lines changed

1 file changed

+53
-0
lines changed

bin/find_dead_links

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/usr/bin/env python3
2+
3+
import multiprocessing
4+
import re
5+
from glob import glob
6+
from typing import List
7+
8+
import requests
9+
10+
from hypernode.common.settings import DOCS_DIR
11+
12+
FAKE_DOMAINS = [
13+
"example.com",
14+
"yourdomain.com",
15+
"example.hypernode.io",
16+
"hypernode.local",
17+
"127.0.0.1",
18+
]
19+
20+
21+
def get_links(md_file: str) -> List[str]:
22+
with open(md_file) as f:
23+
content = f.read()
24+
return re.findall(r"\[.*\]\((http.+?)\)", content)
25+
26+
27+
def is_link_is_dead(link: str) -> bool:
28+
try:
29+
resp = requests.get(link, timeout=5)
30+
except Exception as e:
31+
print(f"Couldn't get {link}: {e}")
32+
return True
33+
dead = resp.status_code in [404, 500, 502, 503, 504]
34+
if dead:
35+
print(f"Dead link: {link}")
36+
return dead
37+
38+
39+
def main():
40+
links = []
41+
for md_file in glob(f"{DOCS_DIR}/**/*.md", recursive=True):
42+
for link in get_links(md_file):
43+
if not any(fake_domain in link for fake_domain in FAKE_DOMAINS):
44+
links.append(link)
45+
links = list(set(links))
46+
print(f"Found {len(links)} unique links")
47+
# Loop over links in a multiprocessing pool
48+
with multiprocessing.Pool(4) as p:
49+
p.map(is_link_is_dead, links)
50+
51+
52+
if __name__ == "__main__":
53+
main()

0 commit comments

Comments
 (0)