experiment: run hammers on Mathlib

kim-em · kim-em · commit 06ff22e2a201 · 2025-12-04T03:20:07.000Z
diff --git a/Mathlib/Tactic/TacticAnalysis/Declarations.lean b/Mathlib/Tactic/TacticAnalysis/Declarations.lean
@@ -349,7 +349,9 @@ register_option linter.tacticAnalysis.tryAtEachStepSimpAllSuggestions : Bool :=
 
 @[tacticAnalysis linter.tacticAnalysis.tryAtEachStepSimpAllSuggestions,
    inherit_doc linter.tacticAnalysis.tryAtEachStepSimpAllSuggestions]
-def tryAtEachStepSimpAllSuggestions := tryAtEachStep fun _ _ => `(tactic| simp_all? +suggestions)
+-- This `try` is needed or we get an error
+-- in `Logic/Equiv/Defs.lean` at `def cast` that I don't understand.
+def tryAtEachStepSimpAllSuggestions := tryAtEachStep fun _ _ => `(tactic| try simp_all? +suggestions)
 
 -- TODO: add compatibility with `rintro` and `intros`
 /-- Suggest merging two adjacent `intro` tactics which don't pattern match. -/
diff --git a/lakefile.lean b/lakefile.lean
@@ -34,6 +34,12 @@ abbrev mathlibOnlyLinters : Array LeanOption := #[
   ⟨`linter.allScriptsDocumented, true⟩,
   ⟨`linter.pythonStyle, true⟩,
   ⟨`linter.style.longFile, .ofNat 1500⟩,
+  -- ⟨`linter.tacticAnalysis.tryAtEachStepAesop, true⟩,
+  ⟨`linter.tacticAnalysis.tryAtEachStepSimpAll, true⟩,
+  ⟨`linter.tacticAnalysis.tryAtEachStepGrind, true⟩,
+  ⟨`linter.tacticAnalysis.tryAtEachStepSimpAllSuggestions, true⟩,
+  ⟨`linter.tacticAnalysis.tryAtEachStepGrindSuggestions, true⟩,
+  ⟨`linter.tacticAnalysis.tryAtEachStepGrind.fraction, .ofNat 10⟩,
   -- ⟨`linter.nightlyRegressionSet, true⟩,
   -- `latest_import.yml` uses this comment: if you edit it, make sure that the workflow still works
 ]
diff --git a/scripts/README.md b/scripts/README.md
@@ -87,6 +87,9 @@ to learn about it as well!
   Generates `unused.md` containing a markdown table showing the unused imports,
   and suggests `lake exe graph` commands to visualize the largest "rectangles" of unused imports.
 
+**Analyzing hammer tactic suggestions**
+- `analyze_hammer_suggestions.py` analyzes which tactics can replace existing tactics at each location, with and without +suggestions. By default analyzes simp_all and grind on all of Mathlib. Use `--aesop` and `--canonical` to enable additional tactics, or `--no-<tactic>` to disable defaults. Use `--raw` for location:tactic pairs instead of summary tables.
+
 **CI workflow**
 - `lake-build-with-retry.sh`
   Runs `lake build` on a target until `lake build --no-build` succeeds. Used in the main build workflows.
diff --git a/scripts/analyze_hammer_suggestions.py b/scripts/analyze_hammer_suggestions.py
@@ -0,0 +1,321 @@
+#!/usr/bin/env python3
+"""
+Analyze hammer suggestions from Lean build output.
+
+This script processes the output of `lake build --no-build` to extract and analyze
+tactic replacement suggestions generated by Lean's hammer tactics.
+
+Usage:
+    # Analyze all of Mathlib with default tactics (simp_all and grind)
+    ./scripts/analyze_hammer_suggestions.py
+
+    # Analyze a specific module
+    ./scripts/analyze_hammer_suggestions.py Mathlib.Data.List.Basic
+
+    # Enable specific tactics (use --no-<tactic> to disable)
+    ./scripts/analyze_hammer_suggestions.py --aesop --canonical
+
+    # Disable default tactics
+    ./scripts/analyze_hammer_suggestions.py --no-simp-all --no-grind
+
+    # Output raw location:tactic pairs for further processing
+    ./scripts/analyze_hammer_suggestions.py --raw
+
+By default, only simp_all and grind are enabled. Use --aesop and --canonical to
+enable additional tactics. The script analyzes which tactics can replace existing
+tactics at each location, both with and without the +suggestions flag.
+
+Output includes:
+  - Three categories based on +suggestions impact:
+    * Regressions: worked without +suggestions but failed with +suggestions
+    * Improvements: failed without +suggestions but worked with +suggestions
+    * Neutral: same result with/without +suggestions
+  - For each category, a binary table showing tactic combinations
+  - Grid format showing binary patterns (1=tactic works, 0=doesn't)
+
+Normalization:
+  - Removes 'try ' prefix from tactics
+  - Removes '?' modifier (e.g., 'simp_all?' -> 'simp_all')
+  - Removes dagger symbol '✝' from tactic names
+"""
+
+import argparse
+import re
+import subprocess
+from collections import defaultdict
+from itertools import product
+
+
+def normalize_tactic(tactic):
+    """Normalize tactic name, handling +suggestions variants and try prefix."""
+    # Remove dagger symbol
+    tactic = tactic.replace('✝', '')
+    tactic = tactic.strip()
+
+    # Remove 'try ' prefix if present
+    if tactic.startswith('try '):
+        tactic = tactic[4:].strip()
+
+    # Remove '?' modifier (but keep it before +suggestions)
+    # e.g., "simp_all? +suggestions" -> "simp_all +suggestions"
+    tactic = tactic.replace('? +', ' +')
+    # Remove trailing '?' if present
+    if tactic.endswith('?'):
+        tactic = tactic[:-1].strip()
+
+    return tactic
+
+
+def extract_suggestions(target):
+    """Run lake build and extract hammer suggestions."""
+    result = subprocess.run(
+        ['lake', 'build', '--no-build', target],
+        capture_output=True,
+        text=True
+    )
+
+    output = result.stdout + result.stderr
+
+    # Pattern to match info messages with "can be replaced with"
+    pattern = r'info: ([^:]+):(\d+):(\d+):.*? can be replaced with `([^`]+)`'
+    matches = re.findall(pattern, output, re.DOTALL)
+
+    # Group suggestions by location
+    locations = defaultdict(set)
+    for filepath, row, col, replacement in matches:
+        location = f"{filepath}:{row}:{col}"
+        normalized = normalize_tactic(replacement)
+        locations[location].add(normalized)
+
+    return locations
+
+
+def output_raw(locations):
+    """Output raw location:tactic pairs."""
+    for location, tactics in sorted(locations.items()):
+        for tactic in sorted(tactics):
+            print(f"{location}:{tactic}")
+
+
+def check_tactic_support(locations, tactics):
+    """
+    For each location, determine which tactics from the given set are suggested.
+    Returns a dict mapping location -> set of supported tactics.
+    """
+    result = {}
+    for location, suggestions in locations.items():
+        supported = set()
+        for tactic in tactics:
+            if tactic in suggestions:
+                supported.add(tactic)
+        result[location] = frozenset(supported)
+
+    return result
+
+
+def count_subsets(tactic_support):
+    """
+    Count how many locations support each subset of tactics.
+    Returns a dict mapping frozenset -> count.
+    """
+    counts = defaultdict(int)
+    for supported in tactic_support.values():
+        counts[supported] += 1
+
+    return counts
+
+
+def format_subset(subset, tactics):
+    """Format a subset as a binary string for display."""
+    return ''.join('1' if t in subset else '0' for t in tactics)
+
+
+def print_table(counts, tactics, title):
+    """Print a formatted table of counts."""
+    print(f"\n{title}")
+    print("=" * len(title))
+
+    # Generate all possible subsets
+    all_subsets = []
+    for bits in product([0, 1], repeat=len(tactics)):
+        subset = frozenset(t for t, b in zip(tactics, bits) if b)
+        all_subsets.append(subset)
+
+    # Sort subsets by binary representation for consistent display
+    all_subsets.sort(key=lambda s: format_subset(s, tactics), reverse=True)
+
+    # Create a mapping from binary pattern to count
+    pattern_counts = {}
+    for subset in all_subsets:
+        pattern = format_subset(subset, tactics)
+        pattern_counts[pattern] = counts.get(subset, 0)
+
+    # Print all combinations
+    num_tactics = len(tactics)
+    num_combinations = 2 ** num_tactics
+    print(f"\nTactics: {', '.join(tactics)}")
+    print("Binary pattern: " + "".join(f"{t[0]}" for t in tactics))
+    print()
+
+    # Group into rows of 4 for readability
+    for i in range(0, num_combinations, 4):
+        for j in range(min(4, num_combinations - i)):
+            combo_idx = num_combinations - 1 - (i + j)  # Count down from all 1s
+            bits = format(combo_idx, f'0{num_tactics}b')
+            count = pattern_counts.get(bits, 0)
+            subset_names = [tactics[k] for k in range(num_tactics) if bits[k] == '1']
+            subset_str = '{' + ','.join(subset_names) + '}' if subset_names else '{}'
+            print(f"{bits} {count:>6}  {subset_str}")
+        if i + 4 < num_combinations:
+            print()
+
+
+def categorize_locations(locations, base_tactics):
+    """
+    Categorize locations into three groups based on +suggestions impact:
+    - regression: base works but +suggestions doesn't for at least one tactic
+    - improvement: +suggestions works but base doesn't for at least one tactic (and no regressions)
+    - neutral: same result for all tactics with/without +suggestions
+
+    Returns three dicts mapping location -> frozenset of tactics that worked.
+    """
+    regression_locs = {}
+    improvement_locs = {}
+    neutral_locs = {}
+
+    for location, suggestions in locations.items():
+        has_regression = False
+        has_improvement = False
+
+        base_working = set()
+        sugg_working = set()
+
+        for tactic in base_tactics:
+            base_works = tactic in suggestions
+            sugg_works = f'{tactic} +suggestions' in suggestions
+
+            if base_works:
+                base_working.add(tactic)
+            if sugg_works:
+                sugg_working.add(f'{tactic} +suggestions')
+
+            if base_works and not sugg_works:
+                has_regression = True
+            if sugg_works and not base_works:
+                has_improvement = True
+
+        if has_regression:
+            regression_locs[location] = frozenset(base_working)
+        elif has_improvement:
+            improvement_locs[location] = frozenset(sugg_working)
+        else:
+            neutral_locs[location] = frozenset(base_working)
+
+    return regression_locs, improvement_locs, neutral_locs
+
+
+def output_analysis(locations, enabled_tactics):
+    """Output analysis tables."""
+    print(f"Found {len(locations)} unique locations with suggestions")
+
+    # Filter to only enabled tactics
+    base_tactics = [t for t in ['simp_all', 'aesop', 'canonical', 'grind'] if enabled_tactics[t]]
+
+    if not base_tactics:
+        print("\nNo tactics enabled. Use --simp-all, --aesop, --canonical, or --grind to enable tactics.")
+        return
+
+    print(f"\nAnalyzing tactics: {', '.join(base_tactics)}")
+
+    # Categorize locations
+    regression_locs, improvement_locs, neutral_locs = categorize_locations(locations, base_tactics)
+
+    print(f"\nRegressions: {len(regression_locs)} locations")
+    print(f"Improvements: {len(improvement_locs)} locations")
+    print(f"Neutral: {len(neutral_locs)} locations")
+
+    # Create +suggestions tactic names for display
+    suggestions_tactics = [f'{t} +suggestions' for t in base_tactics]
+
+    # Print regression table (base tactics that worked)
+    if regression_locs:
+        regression_counts = count_subsets(regression_locs)
+        print_table(regression_counts, base_tactics, "REGRESSIONS (worked without +suggestions, failed with +suggestions)")
+
+    # Print improvement table (+suggestions tactics that worked)
+    if improvement_locs:
+        improvement_counts = count_subsets(improvement_locs)
+        print_table(improvement_counts, suggestions_tactics, "IMPROVEMENTS (failed without +suggestions, worked with +suggestions)")
+
+    # Print neutral table (base tactics, since same as +suggestions)
+    if neutral_locs:
+        neutral_counts = count_subsets(neutral_locs)
+        print_table(neutral_counts, base_tactics, "NEUTRAL (same result with/without +suggestions)")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Analyze hammer suggestions from Lean build output'
+    )
+    parser.add_argument(
+        'target',
+        nargs='?',
+        default='Mathlib',
+        help='Build target to analyze (default: Mathlib)'
+    )
+    parser.add_argument(
+        '--raw',
+        action='store_true',
+        help='Output raw location:tactic pairs instead of analysis tables'
+    )
+
+    # Tactic enable/disable flags (simp_all and grind enabled by default)
+    parser.add_argument(
+        '--simp-all',
+        action=argparse.BooleanOptionalAction,
+        default=True,
+        help='Include simp_all in analysis (default: enabled)'
+    )
+    parser.add_argument(
+        '--aesop',
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help='Include aesop in analysis (default: disabled)'
+    )
+    parser.add_argument(
+        '--canonical',
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help='Include canonical in analysis (default: disabled)'
+    )
+    parser.add_argument(
+        '--grind',
+        action=argparse.BooleanOptionalAction,
+        default=True,
+        help='Include grind in analysis (default: enabled)'
+    )
+
+    args = parser.parse_args()
+
+    # Build enabled tactics dict (normalize argument names to tactic names)
+    enabled_tactics = {
+        'simp_all': args.simp_all,
+        'aesop': args.aesop,
+        'canonical': args.canonical,
+        'grind': args.grind
+    }
+
+    # Extract all suggestions
+    locations = extract_suggestions(args.target)
+
+    if args.raw:
+        output_raw(locations)
+    else:
+        print(f"Analyzing hammer suggestions for {args.target}...")
+        print("=" * 70)
+        print()
+        output_analysis(locations, enabled_tactics)
+
+
+if __name__ == '__main__':
+    main()