Skip to content

Conversation

@ykla
Copy link
Contributor

@ykla ykla commented Dec 5, 2025

Signed-off-by: ykla [email protected]
Sponsored by: Chinese FreeBSD Community

Result: Found 1 ordering error(s):
  - Date 20211118 (line 127) is newer than 20211110 (line 126)

Since the entries are added entirely by hand, errors like this are quite reasonable. To prevent such problems from happening again in future, I suggest adding a similar checking tool to the CI.

#!/usr/bin/env python3
"""
Check whether dates like "20251105:" in the text are strictly sorted in reverse order,
i.e., the newest date at the top and the oldest date at the bottom.
"""

import sys
import re
import os
from datetime import datetime

def extract_dates_from_text(text):
    """Extract all dates in formats like "20251105:"""
    # Match formats: YYYYMMDD:, YYYY-MM-DD:, YYYY/MM/DD:
    date_patterns = [
        r'(\d{8}):',          # 20251105:
        r'(\d{4}-\d{2}-\d{2}):',  # 2025-11-05:
        r'(\d{4}/\d{2}/\d{2}):',  # 2025/11/05:
    ]
    
    dates = []
    
    for pattern in date_patterns:
        matches = re.finditer(pattern, text)
        for match in matches:  # Fix here: remove parentheses, iterate directly
            date_str = match.group(1)
            dates.append((match.start(), date_str))
    
    # Sort by position (appearance order)
    dates.sort(key=lambda x: x[0])
    
    return [date_str for _, date_str in dates]

def parse_date(date_str):
    """Parse a date string into a datetime object"""
    date_formats = [
        '%Y%m%d',     # 20251105
        '%Y-%m-%d',   # 2025-11-05
        '%Y/%m/%d',   # 2025/11/05
    ]
    
    for fmt in date_formats:
        try:
            return datetime.strptime(date_str, fmt)
        except ValueError:
            continue
    
    return None

def check_date_order(dates):
    """Check whether dates are sorted in reverse order (newest first)"""
    if len(dates) < 2:
        return True, "Only 1 or 0 dates found, no ordering check needed"
    
    parsed_dates = []
    invalid_dates = []
    
    # Parse all dates
    for i, date_str in enumerate(dates):
        date_obj = parse_date(date_str)
        if date_obj:
            parsed_dates.append((i, date_str, date_obj))
        else:
            invalid_dates.append(date_str)
    
    # Error if any date cannot be parsed
    if invalid_dates:
        return False, f"Failed to parse the following date formats: {', '.join(invalid_dates)}"
    
    # Check reverse chronological order (newest first)
    violations = []
    for i in range(1, len(parsed_dates)):
        prev_date = parsed_dates[i-1][2]
        curr_date = parsed_dates[i][2]
        
        # If current date is newer → ordering error
        if curr_date > prev_date:
            violations.append({
                'position': i,
                'prev_date': parsed_dates[i-1][1],
                'prev_line': parsed_dates[i-1][0] + 1,
                'curr_date': parsed_dates[i][1],
                'curr_line': parsed_dates[i][0] + 1,
                'issue': f"Date {parsed_dates[i][1]} (index {i+1}) is newer than previous date {parsed_dates[i-1][1]} (index {i})"
            })
    
    if not violations:
        return True, f"Check passed: all {len(parsed_dates)} dates are in reverse chronological order (newest first)"
    else:
        error_msg = f"Found {len(violations)} ordering error(s):\n"
        for v in violations:
            error_msg += f"  - Date {v['curr_date']} (line {v['curr_line']}) is newer than {v['prev_date']} (line {v['prev_line']})\n"
        return False, error_msg

def analyze_text_file(filepath):
    """Analyze date ordering in a text file"""
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            text = f.read()
    except FileNotFoundError:
        return False, f"Error: file '{filepath}' not found"
    except Exception as e:
        return False, f"Error reading file: {e}"
    
    # Extract dates
    dates = extract_dates_from_text(text)
    
    if not dates:
        return True, "No dates like '20251105:' found"
    
    # Check ordering
    return check_date_order(dates)

def analyze_interactive():
    """Interactive analysis"""
    print("Date Ordering Checker")
    print("=" * 50)
    
    # Input method selection
    print("\nChoose input method:")
    print("1. Read from file")
    print("2. Enter text manually")
    print("3. Read from standard input")
    
    choice = input("\nSelect (1/2/3): ").strip()
    
    text = ""
    
    if choice == '1':
        # Read from file
        filepath = input("Enter file path: ").strip()
        if not os.path.exists(filepath):
            print(f"Error: file '{filepath}' not found")
            return
        
        try:
            with open(filepath, 'r', encoding='utf-8') as f:
                text = f.read()
        except Exception as e:
            print(f"Error reading file: {e}")
            return
            
        print(f"\nLoaded content from '{filepath}'")
        
    elif choice == '2':
        # Manual input
        print("\nEnter text (type EOF or blank line to finish):")
        lines = []
        try:
            while True:
                line = input()
                if line.strip() == 'EOF' or line.strip() == '':
                    break
                lines.append(line)
        except EOFError:
            pass
        
        text = '\n'.join(lines)
        print(f"\nEntered {len(lines)} line(s)")
        
    elif choice == '3':
        # Read from stdin
        print("\nReading text from stdin (Ctrl+D to finish):")
        text = sys.stdin.read()
        print(f"\nRead {len(text)} characters from stdin")
        
    else:
        print("Invalid option")
        return
    
    # Extract dates
    dates = extract_dates_from_text(text)
    
    print(f"\nFound {len(dates)} date(s):")
    for i, date in enumerate(dates):
        print(f"  {i+1}. {date}:")
    
    # Check ordering
    success, message = check_date_order(dates)
    
    print(f"\n{'='*50}")
    print("Result:")
    print(message)
    
    if not success and dates:
        print("\nSuggested correct order (reverse chronological):")
        # Parse dates
        date_objects = []
        for date_str in dates:
            date_obj = parse_date(date_str)
            if date_obj:
                date_objects.append((date_str, date_obj))
        
        # Sort newest → oldest
        date_objects.sort(key=lambda x: x[1], reverse=True)
        
        for i, (date_str, date_obj) in enumerate(date_objects):
            print(f"  {i+1}. {date_str}:")
    
    return success

def main():
    """Main function"""
    if len(sys.argv) == 1:
        # Interactive mode
        analyze_interactive()
        sys.exit(0)
    elif len(sys.argv) == 2:
        # File mode
        filepath = sys.argv[1]
        
        if not os.path.exists(filepath):
            print(f"Error: file '{filepath}' not found")
            sys.exit(1)
        
        success, message = analyze_text_file(filepath)
        
        print(f"File: {filepath}")
        print(f"Result: {message}")
        
        sys.exit(0 if success else 1)
    else:
        print("Usage:")
        print("  python script.py               # interactive mode")
        print("  python script.py <filepath>    # check file")
        sys.exit(1)

if __name__ == "__main__":
    main()

Signed-off-by: ykla [email protected]
Sponsored by: Chinese FreeBSD Community
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant