Skip to content

Commit c920ced

Browse files
author
pixelead0
committed
Refactor link checking script to improve internal link handling
1 parent ea9c5a1 commit c920ced

File tree

1 file changed

+22
-15
lines changed

1 file changed

+22
-15
lines changed

scripts/check_links.py

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def find_internal_links(content):
4848

4949

5050
def resolve_link_url(base_url, md_file, link_url):
51-
"""Resolve the real URL as a browser would from the markdown file location."""
51+
"""Resolve the real URL as a browser would from the markdown file."""
5252
# If link is absolute (starts with /), join with base_url
5353
if link_url.startswith("/"):
5454
return urljoin(base_url, link_url)
@@ -69,29 +69,33 @@ def check_link(base_url, link_url, current_file):
6969
try:
7070
# Handle anchor links - they should resolve from current page
7171
if link_url.startswith("#"):
72-
# Build URL from current file path
73-
file_path = current_file.replace(".md", "/")
72+
# Build URL from current file path, converting .md to .html
73+
file_path = current_file.replace(".md", ".html")
7474
if not file_path.startswith("/"):
7575
file_path = "/" + file_path
7676
full_url = urljoin(base_url, file_path + link_url)
7777
else:
78+
# Convert .md URLs to .html URLs for checking
79+
check_url = link_url
80+
if ".md" in check_url:
81+
check_url = check_url.replace(".md", ".html")
7882
# For relative links, resolve from current file's directory
79-
if not link_url.startswith("/"):
83+
if not check_url.startswith("/"):
8084
# Get current file's directory
8185
current_dir = str(Path(current_file).parent)
8286
if current_dir != ".":
8387
# Resolve relative to current directory
84-
resolved_path = str(Path(current_dir) / link_url)
88+
resolved_path = str(Path(current_dir) / check_url)
8589
else:
86-
resolved_path = link_url
90+
resolved_path = check_url
8791

8892
# Convert to URL format
8993
if not resolved_path.startswith("/"):
9094
resolved_path = "/" + resolved_path
9195
full_url = urljoin(base_url, resolved_path)
9296
else:
9397
# Absolute path from site root
94-
full_url = urljoin(base_url, link_url)
98+
full_url = urljoin(base_url, check_url)
9599

96100
# Make request
97101
response = requests.get(full_url, timeout=5)
@@ -132,12 +136,15 @@ def main():
132136

133137
# Calculate full URL for display
134138
if not url.startswith("#"):
135-
full_url = urljoin(base_url, url)
139+
# Convert .md URLs to .html URLs for display
140+
display_url = url
141+
if ".md" in display_url:
142+
display_url = display_url.replace(".md", ".html")
143+
full_url = urljoin(base_url, display_url)
136144
else:
137-
full_url = urljoin(
138-
base_url,
139-
str(md_file.relative_to(docs_dir)).replace(".md", "/") + url,
140-
)
145+
file_path = str(md_file.relative_to(docs_dir))
146+
file_path = file_path.replace(".md", ".html")
147+
full_url = urljoin(base_url, file_path + url)
141148

142149
result = {
143150
"file": str(md_file.relative_to(docs_dir)),
@@ -154,7 +161,7 @@ def main():
154161
else:
155162
broken_links.append(result)
156163

157-
except Exception as e:
164+
except (IOError, OSError) as e:
158165
print(f"❌ Error reading {md_file}: {e}")
159166

160167
# Print summary
@@ -182,10 +189,10 @@ def main():
182189

183190
# Show some broken links in console
184191
if broken_links:
185-
print(f"\n🔴 BROKEN LINKS (showing first 10):")
192+
print("\n🔴 BROKEN LINKS (showing first 10):")
186193
print("-" * 50)
187194
for link in broken_links[:10]:
188-
print(f"📄 {link['file']}:{link['line']}")
195+
print("📄 {}:{}".format(link["file"], link["line"]))
189196
print(f" Text: {link['text']}")
190197
print(f" URL: {link['url']}")
191198
print(f" Full URL: {link['full_url']}")

0 commit comments

Comments
 (0)