Skip to content

Commit 06ff22e

Browse files
committed
experiment: run hammers on Mathlib
1 parent 2100ee6 commit 06ff22e

File tree

4 files changed

+333
-1
lines changed

4 files changed

+333
-1
lines changed

Mathlib/Tactic/TacticAnalysis/Declarations.lean

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,9 @@ register_option linter.tacticAnalysis.tryAtEachStepSimpAllSuggestions : Bool :=
349349

350350
@[tacticAnalysis linter.tacticAnalysis.tryAtEachStepSimpAllSuggestions,
351351
inherit_doc linter.tacticAnalysis.tryAtEachStepSimpAllSuggestions]
352-
def tryAtEachStepSimpAllSuggestions := tryAtEachStep fun _ _ => `(tactic| simp_all? +suggestions)
352+
-- This `try` is needed or we get an error
353+
-- in `Logic/Equiv/Defs.lean` at `def cast` that I don't understand.
354+
def tryAtEachStepSimpAllSuggestions := tryAtEachStep fun _ _ => `(tactic| try simp_all? +suggestions)
353355

354356
-- TODO: add compatibility with `rintro` and `intros`
355357
/-- Suggest merging two adjacent `intro` tactics which don't pattern match. -/

lakefile.lean

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ abbrev mathlibOnlyLinters : Array LeanOption := #[
3434
⟨`linter.allScriptsDocumented, true⟩,
3535
⟨`linter.pythonStyle, true⟩,
3636
⟨`linter.style.longFile, .ofNat 1500⟩,
37+
-- ⟨`linter.tacticAnalysis.tryAtEachStepAesop, true⟩,
38+
⟨`linter.tacticAnalysis.tryAtEachStepSimpAll, true⟩,
39+
⟨`linter.tacticAnalysis.tryAtEachStepGrind, true⟩,
40+
⟨`linter.tacticAnalysis.tryAtEachStepSimpAllSuggestions, true⟩,
41+
⟨`linter.tacticAnalysis.tryAtEachStepGrindSuggestions, true⟩,
42+
⟨`linter.tacticAnalysis.tryAtEachStepGrind.fraction, .ofNat 10⟩,
3743
-- ⟨`linter.nightlyRegressionSet, true⟩,
3844
-- `latest_import.yml` uses this comment: if you edit it, make sure that the workflow still works
3945
]

scripts/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ to learn about it as well!
8787
Generates `unused.md` containing a markdown table showing the unused imports,
8888
and suggests `lake exe graph` commands to visualize the largest "rectangles" of unused imports.
8989

90+
**Analyzing hammer tactic suggestions**
91+
- `analyze_hammer_suggestions.py` analyzes which tactics can replace existing tactics at each location, with and without +suggestions. By default analyzes simp_all and grind on all of Mathlib. Use `--aesop` and `--canonical` to enable additional tactics, or `--no-<tactic>` to disable defaults. Use `--raw` for location:tactic pairs instead of summary tables.
92+
9093
**CI workflow**
9194
- `lake-build-with-retry.sh`
9295
Runs `lake build` on a target until `lake build --no-build` succeeds. Used in the main build workflows.
Lines changed: 321 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Analyze hammer suggestions from Lean build output.
4+
5+
This script processes the output of `lake build --no-build` to extract and analyze
6+
tactic replacement suggestions generated by Lean's hammer tactics.
7+
8+
Usage:
9+
# Analyze all of Mathlib with default tactics (simp_all and grind)
10+
./scripts/analyze_hammer_suggestions.py
11+
12+
# Analyze a specific module
13+
./scripts/analyze_hammer_suggestions.py Mathlib.Data.List.Basic
14+
15+
# Enable specific tactics (use --no-<tactic> to disable)
16+
./scripts/analyze_hammer_suggestions.py --aesop --canonical
17+
18+
# Disable default tactics
19+
./scripts/analyze_hammer_suggestions.py --no-simp-all --no-grind
20+
21+
# Output raw location:tactic pairs for further processing
22+
./scripts/analyze_hammer_suggestions.py --raw
23+
24+
By default, only simp_all and grind are enabled. Use --aesop and --canonical to
25+
enable additional tactics. The script analyzes which tactics can replace existing
26+
tactics at each location, both with and without the +suggestions flag.
27+
28+
Output includes:
29+
- Three categories based on +suggestions impact:
30+
* Regressions: worked without +suggestions but failed with +suggestions
31+
* Improvements: failed without +suggestions but worked with +suggestions
32+
* Neutral: same result with/without +suggestions
33+
- For each category, a binary table showing tactic combinations
34+
- Grid format showing binary patterns (1=tactic works, 0=doesn't)
35+
36+
Normalization:
37+
- Removes 'try ' prefix from tactics
38+
- Removes '?' modifier (e.g., 'simp_all?' -> 'simp_all')
39+
- Removes dagger symbol '✝' from tactic names
40+
"""
41+
42+
import argparse
43+
import re
44+
import subprocess
45+
from collections import defaultdict
46+
from itertools import product
47+
48+
49+
def normalize_tactic(tactic):
50+
"""Normalize tactic name, handling +suggestions variants and try prefix."""
51+
# Remove dagger symbol
52+
tactic = tactic.replace('✝', '')
53+
tactic = tactic.strip()
54+
55+
# Remove 'try ' prefix if present
56+
if tactic.startswith('try '):
57+
tactic = tactic[4:].strip()
58+
59+
# Remove '?' modifier (but keep it before +suggestions)
60+
# e.g., "simp_all? +suggestions" -> "simp_all +suggestions"
61+
tactic = tactic.replace('? +', ' +')
62+
# Remove trailing '?' if present
63+
if tactic.endswith('?'):
64+
tactic = tactic[:-1].strip()
65+
66+
return tactic
67+
68+
69+
def extract_suggestions(target):
70+
"""Run lake build and extract hammer suggestions."""
71+
result = subprocess.run(
72+
['lake', 'build', '--no-build', target],
73+
capture_output=True,
74+
text=True
75+
)
76+
77+
output = result.stdout + result.stderr
78+
79+
# Pattern to match info messages with "can be replaced with"
80+
pattern = r'info: ([^:]+):(\d+):(\d+):.*? can be replaced with `([^`]+)`'
81+
matches = re.findall(pattern, output, re.DOTALL)
82+
83+
# Group suggestions by location
84+
locations = defaultdict(set)
85+
for filepath, row, col, replacement in matches:
86+
location = f"{filepath}:{row}:{col}"
87+
normalized = normalize_tactic(replacement)
88+
locations[location].add(normalized)
89+
90+
return locations
91+
92+
93+
def output_raw(locations):
94+
"""Output raw location:tactic pairs."""
95+
for location, tactics in sorted(locations.items()):
96+
for tactic in sorted(tactics):
97+
print(f"{location}:{tactic}")
98+
99+
100+
def check_tactic_support(locations, tactics):
101+
"""
102+
For each location, determine which tactics from the given set are suggested.
103+
Returns a dict mapping location -> set of supported tactics.
104+
"""
105+
result = {}
106+
for location, suggestions in locations.items():
107+
supported = set()
108+
for tactic in tactics:
109+
if tactic in suggestions:
110+
supported.add(tactic)
111+
result[location] = frozenset(supported)
112+
113+
return result
114+
115+
116+
def count_subsets(tactic_support):
117+
"""
118+
Count how many locations support each subset of tactics.
119+
Returns a dict mapping frozenset -> count.
120+
"""
121+
counts = defaultdict(int)
122+
for supported in tactic_support.values():
123+
counts[supported] += 1
124+
125+
return counts
126+
127+
128+
def format_subset(subset, tactics):
129+
"""Format a subset as a binary string for display."""
130+
return ''.join('1' if t in subset else '0' for t in tactics)
131+
132+
133+
def print_table(counts, tactics, title):
134+
"""Print a formatted table of counts."""
135+
print(f"\n{title}")
136+
print("=" * len(title))
137+
138+
# Generate all possible subsets
139+
all_subsets = []
140+
for bits in product([0, 1], repeat=len(tactics)):
141+
subset = frozenset(t for t, b in zip(tactics, bits) if b)
142+
all_subsets.append(subset)
143+
144+
# Sort subsets by binary representation for consistent display
145+
all_subsets.sort(key=lambda s: format_subset(s, tactics), reverse=True)
146+
147+
# Create a mapping from binary pattern to count
148+
pattern_counts = {}
149+
for subset in all_subsets:
150+
pattern = format_subset(subset, tactics)
151+
pattern_counts[pattern] = counts.get(subset, 0)
152+
153+
# Print all combinations
154+
num_tactics = len(tactics)
155+
num_combinations = 2 ** num_tactics
156+
print(f"\nTactics: {', '.join(tactics)}")
157+
print("Binary pattern: " + "".join(f"{t[0]}" for t in tactics))
158+
print()
159+
160+
# Group into rows of 4 for readability
161+
for i in range(0, num_combinations, 4):
162+
for j in range(min(4, num_combinations - i)):
163+
combo_idx = num_combinations - 1 - (i + j) # Count down from all 1s
164+
bits = format(combo_idx, f'0{num_tactics}b')
165+
count = pattern_counts.get(bits, 0)
166+
subset_names = [tactics[k] for k in range(num_tactics) if bits[k] == '1']
167+
subset_str = '{' + ','.join(subset_names) + '}' if subset_names else '{}'
168+
print(f"{bits} {count:>6} {subset_str}")
169+
if i + 4 < num_combinations:
170+
print()
171+
172+
173+
def categorize_locations(locations, base_tactics):
174+
"""
175+
Categorize locations into three groups based on +suggestions impact:
176+
- regression: base works but +suggestions doesn't for at least one tactic
177+
- improvement: +suggestions works but base doesn't for at least one tactic (and no regressions)
178+
- neutral: same result for all tactics with/without +suggestions
179+
180+
Returns three dicts mapping location -> frozenset of tactics that worked.
181+
"""
182+
regression_locs = {}
183+
improvement_locs = {}
184+
neutral_locs = {}
185+
186+
for location, suggestions in locations.items():
187+
has_regression = False
188+
has_improvement = False
189+
190+
base_working = set()
191+
sugg_working = set()
192+
193+
for tactic in base_tactics:
194+
base_works = tactic in suggestions
195+
sugg_works = f'{tactic} +suggestions' in suggestions
196+
197+
if base_works:
198+
base_working.add(tactic)
199+
if sugg_works:
200+
sugg_working.add(f'{tactic} +suggestions')
201+
202+
if base_works and not sugg_works:
203+
has_regression = True
204+
if sugg_works and not base_works:
205+
has_improvement = True
206+
207+
if has_regression:
208+
regression_locs[location] = frozenset(base_working)
209+
elif has_improvement:
210+
improvement_locs[location] = frozenset(sugg_working)
211+
else:
212+
neutral_locs[location] = frozenset(base_working)
213+
214+
return regression_locs, improvement_locs, neutral_locs
215+
216+
217+
def output_analysis(locations, enabled_tactics):
218+
"""Output analysis tables."""
219+
print(f"Found {len(locations)} unique locations with suggestions")
220+
221+
# Filter to only enabled tactics
222+
base_tactics = [t for t in ['simp_all', 'aesop', 'canonical', 'grind'] if enabled_tactics[t]]
223+
224+
if not base_tactics:
225+
print("\nNo tactics enabled. Use --simp-all, --aesop, --canonical, or --grind to enable tactics.")
226+
return
227+
228+
print(f"\nAnalyzing tactics: {', '.join(base_tactics)}")
229+
230+
# Categorize locations
231+
regression_locs, improvement_locs, neutral_locs = categorize_locations(locations, base_tactics)
232+
233+
print(f"\nRegressions: {len(regression_locs)} locations")
234+
print(f"Improvements: {len(improvement_locs)} locations")
235+
print(f"Neutral: {len(neutral_locs)} locations")
236+
237+
# Create +suggestions tactic names for display
238+
suggestions_tactics = [f'{t} +suggestions' for t in base_tactics]
239+
240+
# Print regression table (base tactics that worked)
241+
if regression_locs:
242+
regression_counts = count_subsets(regression_locs)
243+
print_table(regression_counts, base_tactics, "REGRESSIONS (worked without +suggestions, failed with +suggestions)")
244+
245+
# Print improvement table (+suggestions tactics that worked)
246+
if improvement_locs:
247+
improvement_counts = count_subsets(improvement_locs)
248+
print_table(improvement_counts, suggestions_tactics, "IMPROVEMENTS (failed without +suggestions, worked with +suggestions)")
249+
250+
# Print neutral table (base tactics, since same as +suggestions)
251+
if neutral_locs:
252+
neutral_counts = count_subsets(neutral_locs)
253+
print_table(neutral_counts, base_tactics, "NEUTRAL (same result with/without +suggestions)")
254+
255+
256+
def main():
257+
parser = argparse.ArgumentParser(
258+
description='Analyze hammer suggestions from Lean build output'
259+
)
260+
parser.add_argument(
261+
'target',
262+
nargs='?',
263+
default='Mathlib',
264+
help='Build target to analyze (default: Mathlib)'
265+
)
266+
parser.add_argument(
267+
'--raw',
268+
action='store_true',
269+
help='Output raw location:tactic pairs instead of analysis tables'
270+
)
271+
272+
# Tactic enable/disable flags (simp_all and grind enabled by default)
273+
parser.add_argument(
274+
'--simp-all',
275+
action=argparse.BooleanOptionalAction,
276+
default=True,
277+
help='Include simp_all in analysis (default: enabled)'
278+
)
279+
parser.add_argument(
280+
'--aesop',
281+
action=argparse.BooleanOptionalAction,
282+
default=False,
283+
help='Include aesop in analysis (default: disabled)'
284+
)
285+
parser.add_argument(
286+
'--canonical',
287+
action=argparse.BooleanOptionalAction,
288+
default=False,
289+
help='Include canonical in analysis (default: disabled)'
290+
)
291+
parser.add_argument(
292+
'--grind',
293+
action=argparse.BooleanOptionalAction,
294+
default=True,
295+
help='Include grind in analysis (default: enabled)'
296+
)
297+
298+
args = parser.parse_args()
299+
300+
# Build enabled tactics dict (normalize argument names to tactic names)
301+
enabled_tactics = {
302+
'simp_all': args.simp_all,
303+
'aesop': args.aesop,
304+
'canonical': args.canonical,
305+
'grind': args.grind
306+
}
307+
308+
# Extract all suggestions
309+
locations = extract_suggestions(args.target)
310+
311+
if args.raw:
312+
output_raw(locations)
313+
else:
314+
print(f"Analyzing hammer suggestions for {args.target}...")
315+
print("=" * 70)
316+
print()
317+
output_analysis(locations, enabled_tactics)
318+
319+
320+
if __name__ == '__main__':
321+
main()

0 commit comments

Comments
 (0)