Skip to content

Full redirection for old site URLs #25

@wspear

Description

@wspear

There are already some redirects in place. Ideally we should include tags to force crawler updates when they hit the old URLs. We may want to update our 404.html for the new site to handle any missed cases.

An automatic redirect generator script like this could take care of this quickly, with just a bit of manual editing to set redirects. The following code is AI generated and untested.

import os
from pathlib import Path

# --- CONFIGURATION ---

# 1. Path to your old website files (e.g., 'C:/Users/You/Desktop/OldWebsite')
OLD_SITE_DIR = 'old_site_content' 

# 2. Path to your new website files (e.g., 'C:/Users/You/Desktop/NewWebsite')
# This is where the redirect files will be created.
NEW_SITE_DIR = 'new_site_content' 

# 3. The base URL for your *NEW* site (e.g., 'https://mynewdomain.github.io')
NEW_BASE_URL = 'https://mynewdomain.github.io'

# 4. The URL all *missing* old pages will redirect to by default (usually the homepage).
DEFAULT_TARGET_URL = NEW_BASE_URL.rstrip('/') + '/'

# --- HTML TEMPLATE ---

def generate_redirect_html(target_url, old_relative_path, new_base_url):
    """Generates the HTML content for the redirect page."""
    
    # Clean up the path for display in the small tag
    display_path = '/' + old_relative_path.replace('\\', '/').lstrip('/')

    return f"""<!DOCTYPE html>
<html lang="en-US">
<head>
    <meta charset="utf-8">
    <title>Page Moved Permanently</title>
    <!-- THE AUTOMATIC CLIENT-SIDE REDIRECT (0 seconds delay) -->
    <meta http-equiv="refresh" content="0; url={target_url}">
    
    <!-- THE SEO SIGNAL: Tells search engines the new permanent location (301 equivalent) -->
    <link rel="canonical" href="{target_url}">
</head>
<body>
    <div style="text-align: center; padding: 50px;">
        <h1>Redirecting...</h1>
        <p>This page has permanently moved. If you are not automatically redirected, 
           <a href="{target_url}">click here</a> to go to the new location.</p>
        <small>Old path: {display_path}</small>
    </div>
</body>
</html>
"""

# --- SCRIPT LOGIC ---

def run_redirect_generator():
    """Main function to scan files and generate redirects and the summary list."""
    old_dir_path = Path(OLD_SITE_DIR)
    new_dir_path = Path(NEW_SITE_DIR)
    
    if not old_dir_path.exists():
        print(f"ERROR: Old site directory not found at '{old_dir_path}'. Please check OLD_SITE_DIR setting.")
        return
    
    if not new_dir_path.exists():
        print(f"Creating new site directory at '{new_dir_path}'...")
        new_dir_path.mkdir(parents=True, exist_ok=True)
        
    print(f"Scanning for HTML files in: {old_dir_path}")
    print(f"Generating redirects into: {new_dir_path}")
    print("-" * 50)
    
    redirect_count = 0
    redirect_list = []

    # Walk through the old site directory recursively
    for old_file_path in old_dir_path.glob('**/*.html'):
        # Get the path relative to the old site root
        relative_path = old_file_path.relative_to(old_dir_path)
        
        # Determine the target location in the new site structure
        target_new_site_path = new_dir_path / relative_path
        
        # Determine the assumed new URL (for logging/review)
        assumed_url_path = str(relative_path).replace('\\', '/')
        assumed_new_full_url = f"{NEW_BASE_URL.rstrip('/')}/{assumed_url_path}"
        
        # 1. CHECK FOR OVERWRITE (Does the new site already have a file here?)
        if target_new_site_path.exists():
            print(f"SKIP: Found new file at '{relative_path}'. No redirect needed.")
            continue
            
        # 2. GENERATE REDIRECT (File does not exist in the new site)
        
        final_target_url = DEFAULT_TARGET_URL
        is_default_redirect = True

        # Ensure the parent directories exist for the redirect file
        target_new_site_path.parent.mkdir(parents=True, exist_ok=True)

        # Generate the HTML content
        redirect_content = generate_redirect_html(
            final_target_url, 
            str(relative_path), 
            NEW_BASE_URL
        )
        
        # Write the redirect file to the new site structure
        with open(target_new_site_path, 'w', encoding='utf-8') as f:
            f.write(redirect_content)
        
        print(f"CREATED: Redirect for old path '{relative_path}' to default URL.")
        redirect_count += 1
        
        # Record for the summary list
        redirect_list.append({
            'old_path': '/' + str(relative_path).replace('\\', '/'),
            'target_url': final_target_url,
            'assumed_url': assumed_new_full_url,
            'is_default': is_default_redirect
        })

    print("-" * 50)
    print(f"Generation complete. Total redirects created: {redirect_count}")
    
    # Generate the summary file
    if redirect_list:
        generate_summary_file(redirect_list)
        print("Generated 'redirect_summary.md' for manual review.")
    else:
        print("No redirects created.")
        
    print("These new files and the summary must be committed and pushed to your GitHub Pages repository.")

def generate_summary_file(redirect_list):
    """Creates a markdown file summarizing all generated redirects."""
    
    summary_filepath = Path(NEW_SITE_DIR) / 'redirect_summary.md'
    
    markdown_content = f"# Redirect Summary ({len(redirect_list)} Generated)\n\n"
    markdown_content += "This file lists all the HTML files from the old site that were missing in the new site. \n"
    markdown_content += "They have been automatically redirected to the **Default Target URL** configured in the script.\n\n"
    markdown_content += "**Action Required:** You must review the `Old Path` and find the correct corresponding `New Target URL` for each entry below and update the generated HTML files manually.\n\n"
    markdown_content += f"| Status | Old Path | Assumed New Path | Current Redirect Target |\n"
    markdown_content += f"| :--- | :--- | :--- | :--- |\n"

    for entry in redirect_list:
        status = "**DEFAULT (REVIEW)**" if entry['is_default'] else "OK"
        
        markdown_content += (
            f"| {status} "
            f"| `{entry['old_path']}` "
            f"| `{entry['assumed_url']}` "
            f"| `{entry['target_url']}` |\n"
        )
        
    with open(summary_filepath, 'w', encoding='utf-8') as f:
        f.write(markdown_content)


if __name__ == '__main__':
    run_redirect_generator()

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions