Skip to content

Commit 72c8b27

Browse files
feat: script to find dead links
1 parent 7713da2 commit 72c8b27

File tree

1 file changed

+54
-0
lines changed

1 file changed

+54
-0
lines changed

bin/find_dead_links

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#!/usr/bin/env python3
2+
3+
import multiprocessing
4+
import os
5+
import re
6+
from glob import glob
7+
from typing import List
8+
9+
import requests
10+
11+
PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
12+
DOCS_DIR = os.path.join(PROJECT_DIR, "docs")
13+
FAKE_DOMAINS = [
14+
"example.com",
15+
"yourdomain.com",
16+
"example.hypernode.io",
17+
"hypernode.local",
18+
"127.0.0.1",
19+
]
20+
21+
22+
def get_links(md_file: str) -> List[str]:
23+
with open(md_file) as f:
24+
content = f.read()
25+
return re.findall(r"\[.*\]\((http.+?)\)", content)
26+
27+
28+
def is_link_is_dead(link: str) -> bool:
29+
try:
30+
resp = requests.get(link, timeout=5)
31+
except Exception as e:
32+
print(f"Couldn't get {link}: {e}")
33+
return True
34+
dead = resp.status_code in [404, 500, 502, 503, 504]
35+
if dead:
36+
print(f"Dead link: {link}")
37+
return dead
38+
39+
40+
def main():
41+
links = []
42+
for md_file in glob(f"{DOCS_DIR}/**/*.md", recursive=True):
43+
for link in get_links(md_file):
44+
if not any(fake_domain in link for fake_domain in FAKE_DOMAINS):
45+
links.append(link)
46+
links = list(set(links))
47+
print(f"Found {len(links)} unique links")
48+
# Loop over links in a multiprocessing pool
49+
with multiprocessing.Pool(4) as p:
50+
p.map(is_link_is_dead, links)
51+
52+
53+
if __name__ == "__main__":
54+
main()

0 commit comments

Comments
 (0)