-
Notifications
You must be signed in to change notification settings - Fork 8
Open
Description
There are already some redirects in place. Ideally we should include tags to force crawler updates when they hit the old URLs. We may want to update our 404.html for the new site to handle any missed cases.
An automatic redirect generator script like this could take care of this quickly, with just a bit of manual editing to set redirects. The following code is AI generated and untested.
import os
from pathlib import Path
# --- CONFIGURATION ---
# 1. Path to your old website files (e.g., 'C:/Users/You/Desktop/OldWebsite')
OLD_SITE_DIR = 'old_site_content'
# 2. Path to your new website files (e.g., 'C:/Users/You/Desktop/NewWebsite')
# This is where the redirect files will be created.
NEW_SITE_DIR = 'new_site_content'
# 3. The base URL for your *NEW* site (e.g., 'https://mynewdomain.github.io')
NEW_BASE_URL = 'https://mynewdomain.github.io'
# 4. The URL all *missing* old pages will redirect to by default (usually the homepage).
DEFAULT_TARGET_URL = NEW_BASE_URL.rstrip('/') + '/'
# --- HTML TEMPLATE ---
def generate_redirect_html(target_url, old_relative_path, new_base_url):
"""Generates the HTML content for the redirect page."""
# Clean up the path for display in the small tag
display_path = '/' + old_relative_path.replace('\\', '/').lstrip('/')
return f"""<!DOCTYPE html>
<html lang="en-US">
<head>
<meta charset="utf-8">
<title>Page Moved Permanently</title>
<!-- THE AUTOMATIC CLIENT-SIDE REDIRECT (0 seconds delay) -->
<meta http-equiv="refresh" content="0; url={target_url}">
<!-- THE SEO SIGNAL: Tells search engines the new permanent location (301 equivalent) -->
<link rel="canonical" href="{target_url}">
</head>
<body>
<div style="text-align: center; padding: 50px;">
<h1>Redirecting...</h1>
<p>This page has permanently moved. If you are not automatically redirected,
<a href="{target_url}">click here</a> to go to the new location.</p>
<small>Old path: {display_path}</small>
</div>
</body>
</html>
"""
# --- SCRIPT LOGIC ---
def run_redirect_generator():
"""Main function to scan files and generate redirects and the summary list."""
old_dir_path = Path(OLD_SITE_DIR)
new_dir_path = Path(NEW_SITE_DIR)
if not old_dir_path.exists():
print(f"ERROR: Old site directory not found at '{old_dir_path}'. Please check OLD_SITE_DIR setting.")
return
if not new_dir_path.exists():
print(f"Creating new site directory at '{new_dir_path}'...")
new_dir_path.mkdir(parents=True, exist_ok=True)
print(f"Scanning for HTML files in: {old_dir_path}")
print(f"Generating redirects into: {new_dir_path}")
print("-" * 50)
redirect_count = 0
redirect_list = []
# Walk through the old site directory recursively
for old_file_path in old_dir_path.glob('**/*.html'):
# Get the path relative to the old site root
relative_path = old_file_path.relative_to(old_dir_path)
# Determine the target location in the new site structure
target_new_site_path = new_dir_path / relative_path
# Determine the assumed new URL (for logging/review)
assumed_url_path = str(relative_path).replace('\\', '/')
assumed_new_full_url = f"{NEW_BASE_URL.rstrip('/')}/{assumed_url_path}"
# 1. CHECK FOR OVERWRITE (Does the new site already have a file here?)
if target_new_site_path.exists():
print(f"SKIP: Found new file at '{relative_path}'. No redirect needed.")
continue
# 2. GENERATE REDIRECT (File does not exist in the new site)
final_target_url = DEFAULT_TARGET_URL
is_default_redirect = True
# Ensure the parent directories exist for the redirect file
target_new_site_path.parent.mkdir(parents=True, exist_ok=True)
# Generate the HTML content
redirect_content = generate_redirect_html(
final_target_url,
str(relative_path),
NEW_BASE_URL
)
# Write the redirect file to the new site structure
with open(target_new_site_path, 'w', encoding='utf-8') as f:
f.write(redirect_content)
print(f"CREATED: Redirect for old path '{relative_path}' to default URL.")
redirect_count += 1
# Record for the summary list
redirect_list.append({
'old_path': '/' + str(relative_path).replace('\\', '/'),
'target_url': final_target_url,
'assumed_url': assumed_new_full_url,
'is_default': is_default_redirect
})
print("-" * 50)
print(f"Generation complete. Total redirects created: {redirect_count}")
# Generate the summary file
if redirect_list:
generate_summary_file(redirect_list)
print("Generated 'redirect_summary.md' for manual review.")
else:
print("No redirects created.")
print("These new files and the summary must be committed and pushed to your GitHub Pages repository.")
def generate_summary_file(redirect_list):
"""Creates a markdown file summarizing all generated redirects."""
summary_filepath = Path(NEW_SITE_DIR) / 'redirect_summary.md'
markdown_content = f"# Redirect Summary ({len(redirect_list)} Generated)\n\n"
markdown_content += "This file lists all the HTML files from the old site that were missing in the new site. \n"
markdown_content += "They have been automatically redirected to the **Default Target URL** configured in the script.\n\n"
markdown_content += "**Action Required:** You must review the `Old Path` and find the correct corresponding `New Target URL` for each entry below and update the generated HTML files manually.\n\n"
markdown_content += f"| Status | Old Path | Assumed New Path | Current Redirect Target |\n"
markdown_content += f"| :--- | :--- | :--- | :--- |\n"
for entry in redirect_list:
status = "**DEFAULT (REVIEW)**" if entry['is_default'] else "OK"
markdown_content += (
f"| {status} "
f"| `{entry['old_path']}` "
f"| `{entry['assumed_url']}` "
f"| `{entry['target_url']}` |\n"
)
with open(summary_filepath, 'w', encoding='utf-8') as f:
f.write(markdown_content)
if __name__ == '__main__':
run_redirect_generator()
Metadata
Metadata
Assignees
Labels
No labels