Skip to content

Commit 5d6af74

Browse files
committed
refactor: make labels type-safe and make max. number of allowed labels configurable
1 parent ca7d6f1 commit 5d6af74

File tree

1 file changed

+169
-88
lines changed

1 file changed

+169
-88
lines changed

scripts/autolabel.lean

Lines changed: 169 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ These are printed for testing purposes.
3131
to the PR specified. This requires the **GitHub CLI** `gh` to be installed!
3232
Example: `lake exe autolabel 10402` for PR https://github.com/leanprover-community/mathlib4/pull/10402.
3333
34-
For the time being, the script only adds a label if it finds a **single unique label**
35-
which would apply. If multiple labels are found, nothing happens.
34+
The script can add up to `MAX_LABELS` labels (defined below).
35+
If more than `MAX_LABELS` labels would be applicable, nothing will be added.
3636
3737
## Workflow
3838
@@ -55,95 +55,174 @@ open Lean System
5555

5656
namespace AutoLabel
5757

58+
/-- Maximal number of labels which can be added. If more are applicable, nothing will be added. -/
59+
def MAX_LABELS := 1
60+
61+
/-- Mathlib's Github topic labels -/
62+
inductive Label where
63+
| «t-algebra»
64+
| «t-algebraic-geometry»
65+
| «t-algebraic-topology»
66+
| «t-analysis»
67+
| «t-category-theory»
68+
| «t-combinatorics»
69+
| «t-computability»
70+
| «t-condensed»
71+
| «t-convex-geometry»
72+
| «t-data»
73+
| «t-differential-geometry»
74+
| «t-dynamics»
75+
| «t-euclidean-geometry»
76+
| «t-geometric-group-theory»
77+
| «t-group-theory»
78+
| «t-linter»
79+
| «t-logic»
80+
| «t-measure-probability»
81+
| «t-meta»
82+
| «t-number-theory»
83+
| «t-order»
84+
| «t-ring-theory»
85+
| «t-set-theory»
86+
| «t-topology»
87+
| «CI»
88+
| «IMO»
89+
| «dependency-bump»
90+
deriving BEq, Hashable, Repr
91+
92+
/--
93+
Array of all topic labels which are used in Mathlib.
94+
95+
Note: Since Lean does not know reflection, this needs to be updated manually!
96+
-/
97+
def mathlibLabels : Array Label := #[
98+
t-algebra», .«t-algebraic-geometry», .«t-algebraic-topology», .«t-analysis»,
99+
t-category-theory», .«t-combinatorics», .«t-computability», .«t-condensed»,
100+
t-convex-geometry», .«t-data», .«t-differential-geometry», .«t-dynamics»,
101+
t-euclidean-geometry», .«t-geometric-group-theory», .«t-group-theory», .«t-linter»,
102+
t-logic», .«t-measure-probability», .«t-meta», .«t-number-theory», .«t-order»,
103+
t-ring-theory», .«t-set-theory», .«t-topology», .«CI», .«IMO»,
104+
dependency-bump»
105+
]
106+
107+
-- Note: Since Lean does not know reflection, this needs to be updated manually!
108+
def Label.toString : Label → String
109+
| .«t-algebra» => "t-algebra"
110+
| .«t-algebraic-geometry» => "t-algebraic-geometry"
111+
| .«t-algebraic-topology» => "t-algebraic-topology"
112+
| .«t-analysis» => "t-analysis"
113+
| .«t-category-theory» => "t-category-theory"
114+
| .«t-combinatorics» => "t-combinatorics"
115+
| .«t-computability» => "t-computability"
116+
| .«t-condensed» => "t-condensed"
117+
| .«t-convex-geometry» => "t-convex-geometry"
118+
| .«t-data» => "t-data"
119+
| .«t-differential-geometry» => "t-differential-geometry"
120+
| .«t-dynamics» => "t-dynamics"
121+
| .«t-euclidean-geometry» => "t-euclidean-geometry"
122+
| .«t-geometric-group-theory» => "t-geometric-group-theory"
123+
| .«t-group-theory» => "t-group-theory"
124+
| .«t-linter» => "t-linter"
125+
| .«t-logic» => "t-logic"
126+
| .«t-measure-probability» => "t-measure-probability"
127+
| .«t-meta» => "t-meta"
128+
| .«t-number-theory» => "t-number-theory"
129+
| .«t-order» => "t-order"
130+
| .«t-ring-theory» => "t-ring-theory"
131+
| .«t-set-theory» => "t-set-theory"
132+
| .«t-topology» => "t-topology"
133+
| .«CI» => "CI"
134+
| .«IMO» => "IMO"
135+
| .«dependency-bump» => "dependency-bump"
136+
137+
instance : ToString Label where
138+
toString := Label.toString
139+
58140
/--
59-
A `Label` consists of the
60-
* The `label` field is the actual GitHub label name.
141+
A `LabelData` consists of the
61142
* The `dirs` field is the array of all "root paths" such that a modification in a file contained
62143
in one of these paths should be labelled with `label`.
63144
* The `exclusions` field is the array of all "root paths" that are excluded, among the
64145
ones that start with the ones in `dirs`.
65146
Any modifications to a file in an excluded path is ignored for the purposes of labelling.
66147
-/
67-
structure Label where
68-
/-- The label name as it appears on GitHub -/
69-
label : String
148+
structure LabelData (label : Label) where
70149
/-- Array of paths which fall under this label. e.g. `"Mathlib" / "Algebra"`.
71150
72151
For a label of the form `t-set-theory` this defaults to `#["Mathlib" / "SetTheory"]`. -/
73-
dirs : Array FilePath := if label.startsWith "t-" then
74-
#["Mathlib" / ("".intercalate (label.splitOn "-" |>.drop 1 |>.map .capitalize))]
152+
dirs : Array FilePath := if label.toString.startsWith "t-" then
153+
#["Mathlib" / ("".intercalate (label.toString.splitOn "-" |>.drop 1 |>.map .capitalize))]
75154
else #[]
76155
/-- Array of paths which should be excluded.
77156
Any modifications to a file in an excluded path are ignored for the purposes of labelling. -/
78157
exclusions : Array FilePath := #[]
158+
/-- Labels which are "lower" in the Mathlib import order. These labels will not be added
159+
alongside the label. For example any PR to `t-ring-theory` might modify files from `t-algebra`
160+
but should only get the former label -/
161+
dependencies : Array Label := #[]
79162
deriving BEq, Hashable
80163

81164
/--
82-
Mathlib labels and their corresponding folders. Add new labels and folders here!
165+
Mathlib labels and their corresponding folders.
83166
-/
84-
def mathlibLabels : Array Label := #[
85-
{ label := "t-algebra",
167+
def mathlibLabelData : (l : Label) → LabelData l
168+
| .«t-algebra» => {
86169
dirs := #[
87170
"Mathlib" / "Algebra",
88171
"Mathlib" / "FieldTheory",
89172
"Mathlib" / "RepresentationTheory",
90-
"Mathlib" / "LinearAlgebra"] },
91-
{ label := "t-algebraic-geometry",
173+
"Mathlib" / "LinearAlgebra"] }
174+
| .«t-algebraic-geometry» => {
92175
dirs := #[
93176
"Mathlib" / "AlgebraicGeometry",
94-
"Mathlib" / "Geometry" / "RingedSpace"] },
95-
{ label := "t-algebraic-topology",
96-
dirs := #["Mathlib" / "AlgebraicTopology"] },
97-
{ label := "t-analysis" },
98-
{ label := "t-category-theory" },
99-
{ label := "t-combinatorics" },
100-
{ label := "t-computability" },
101-
{ label := "t-condensed" },
102-
{ label := "t-convex-geometry",
103-
dirs := #["Mathlib" / "Geometry" / "Convex"] },
104-
{ label := "t-data"
177+
"Mathlib" / "Geometry" / "RingedSpace"] }
178+
| .«t-algebraic-topology» => {}
179+
| .«t-analysis» => {}
180+
| .«t-category-theory» => {}
181+
| .«t-combinatorics» => {}
182+
| .«t-computability» => {}
183+
| .«t-condensed» => {}
184+
| .«t-convex-geometry» => {
185+
dirs := #["Mathlib" / "Geometry" / "Convex"] }
186+
| .«t-data» => {
105187
dirs := #[
106188
"Mathlib" / "Control",
107-
"Mathlib" / "Data",] },
108-
{ label := "t-differential-geometry",
109-
dirs := #["Mathlib" / "Geometry" / "Manifold"] },
110-
{ label := "t-dynamics" },
111-
{ label := "t-euclidean-geometry",
112-
dirs := #["Mathlib" / "Geometry" / "Euclidean"] },
113-
{ label := "t-geometric-group-theory",
114-
dirs := #["Mathlib" / "Geometry" / "Group"] },
115-
{ label := "t-group-theory",
116-
dirs := #["Mathlib" / "GroupTheory"] },
117-
{ label := "t-linter",
189+
"Mathlib" / "Data"] }
190+
| .«t-differential-geometry» => {
191+
dirs := #["Mathlib" / "Geometry" / "Manifold"] }
192+
| .«t-dynamics» => {}
193+
| .«t-euclidean-geometry» => {
194+
dirs := #["Mathlib" / "Geometry" / "Euclidean"] }
195+
| .«t-geometric-group-theory» => {
196+
dirs := #["Mathlib" / "Geometry" / "Group"] }
197+
| .«t-group-theory» => {}
198+
| .«t-linter» => {
118199
dirs := #[
119200
"Mathlib" / "Tactic" / "Linter",
120201
"scripts" / "lint-style.lean",
121202
"scripts" / "lint-style.py",
122-
] },
123-
{ label := "t-logic",
203+
] }
204+
| .«t-logic» => {
124205
dirs := #[
125206
"Mathlib" / "Logic",
126-
"Mathlib" / "ModelTheory"] },
127-
{ label := "t-measure-probability",
207+
"Mathlib" / "ModelTheory"] }
208+
| .«t-measure-probability» => {
128209
dirs := #[
129210
"Mathlib" / "MeasureTheory",
130211
"Mathlib" / "Probability",
131-
"Mathlib" / "InformationTheory"] },
132-
{ label := "t-meta",
212+
"Mathlib" / "InformationTheory"] }
213+
| .«t-meta» => {
133214
dirs := #[
134215
"Mathlib" / "Lean",
135216
"Mathlib" / "Mathport",
136217
"Mathlib" / "Tactic",
137218
"Mathlib" / "Util"],
138-
exclusions := #["Mathlib" / "Tactic" / "Linter"] },
139-
{ label := "t-number-theory" },
140-
{ label := "t-order" },
141-
{ label := "t-ring-theory",
142-
dirs := #["Mathlib" / "RingTheory"] },
143-
{ label := "t-set-theory" },
144-
{ label := "t-topology",
145-
dirs := #["Mathlib" / "Topology"] },
146-
{ label := "CI",
219+
exclusions := #["Mathlib" / "Tactic" / "Linter"] }
220+
| .«t-number-theory» => {}
221+
| .«t-order» => {}
222+
| .«t-ring-theory» => {}
223+
| .«t-set-theory» => {}
224+
| .«t-topology» => {}
225+
| .«CI» => {
147226
dirs := #[
148227
".github",
149228
"scripts",
@@ -155,11 +234,11 @@ def mathlibLabels : Array Label := #[
155234
"scripts" / "nolints.json",
156235
"scripts" / "nolints-style.txt",
157236
"scripts" / "nolints_prime_decls.txt",
158-
] },
159-
{ label := "IMO",
160-
dirs := #["Archive" / "Imo"] },
161-
{ label := "dependency-bump",
162-
dirs := #["lake-manifest.json"] } ]
237+
] }
238+
| .«IMO» => {
239+
dirs := #["Archive" / "Imo"] }
240+
| .«dependency-bump» => {
241+
dirs := #["lake-manifest.json"] }
163242

164243
/-- Exceptions inside `Mathlib/` which are not covered by any label. -/
165244
def mathlibUnlabelled : Array FilePath := #[
@@ -174,20 +253,21 @@ def _root_.System.FilePath.isPrefixOf (dir path : FilePath) : Bool :=
174253
(dir / "").normalize.toString.isPrefixOf (path / "").normalize.toString
175254

176255
/--
177-
Return all names of labels in `mathlibLabels` which match
256+
Return all labels in `mathlibLabels` which match
178257
at least one of the `files`.
179258
180259
* `files`: array of relative paths starting from the mathlib root directory.
181260
-/
182-
def getMatchingLabels (files : Array FilePath) : Array String :=
261+
def getMatchingLabels (files : Array FilePath) : Array Label :=
183262
let applicable := mathlibLabels.filter fun label ↦
184263
-- first exclude all files the label excludes,
185264
-- then see if any file remains included by the label
265+
let data := mathlibLabelData label
186266
let notExcludedFiles := files.filter fun file ↦
187-
label.exclusions.all (!·.isPrefixOf file)
188-
label.dirs.any (fun dir ↦ notExcludedFiles.any (dir.isPrefixOf ·))
189-
-- return sorted list of label names
190-
applicable.map (·.label) |>.qsort (· < ·)
267+
data.exclusions.all (!·.isPrefixOf file)
268+
data.dirs.any (fun dir ↦ notExcludedFiles.any (dir.isPrefixOf ·))
269+
-- return sorted list of labels
270+
applicable |>.qsort (·.toString < ·.toString)
191271

192272
/-!
193273
Testing the functionality of the declarations defined in this script
@@ -202,22 +282,22 @@ section Tests
202282

203283
#guard getMatchingLabels #[] == #[]
204284
-- Test default value for `label.dirs` works
205-
#guard getMatchingLabels #["Mathlib" / "SetTheory" / "ZFC"] == #["t-set-theory"]
285+
#guard getMatchingLabels #["Mathlib" / "SetTheory" / "ZFC"] == #[t-set-theory»]
206286
-- Test exclusion
207-
#guard getMatchingLabels #["Mathlib" / "Tactic"/ "Abel.lean"] == #["t-meta"]
208-
#guard getMatchingLabels #["Mathlib" / "Tactic"/ "Linter" / "Lint.lean"] == #["t-linter"]
287+
#guard getMatchingLabels #["Mathlib" / "Tactic"/ "Abel.lean"] == #[t-meta»]
288+
#guard getMatchingLabels #["Mathlib" / "Tactic"/ "Linter" / "Lint.lean"] == #[t-linter»]
209289
#guard getMatchingLabels #[
210290
"Mathlib" / "Tactic"/ "Linter" / "Lint.lean",
211-
"Mathlib" / "Tactic" / "Abel.lean" ] == #["t-linter", "t-meta"]
291+
"Mathlib" / "Tactic" / "Abel.lean" ] == #[t-linter», .«t-meta»]
212292

213293
-- Test targeting a file instead of a directory
214-
#guard getMatchingLabels #["lake-manifest.json"] == #["dependency-bump"]
294+
#guard getMatchingLabels #["lake-manifest.json"] == #[dependency-bump»]
215295

216296
-- Test linting of specific changes touching linting and CI.
217-
#guard getMatchingLabels #["scripts" / "add_deprecations.sh"] == #["CI"]
218-
#guard getMatchingLabels #["scripts" / "lint-style.lean"] == #["t-linter"]
297+
#guard getMatchingLabels #["scripts" / "add_deprecations.sh"] == #[CI»]
298+
#guard getMatchingLabels #["scripts" / "lint-style.lean"] == #[t-linter»]
219299
#guard getMatchingLabels #["Mathlib" / "Tactic" / "Linter" / "TextBased.lean",
220-
"scripts" / "lint-style.lean", "scripts" / "lint-style.py"] == #["t-linter"]
300+
"scripts" / "lint-style.lean", "scripts" / "lint-style.py"] == #[t-linter»]
221301
#guard getMatchingLabels #["scripts" / "noshake.json"] == #[]
222302

223303
/-- Testing function to ensure the labels defined in `mathlibLabels` cover all
@@ -282,24 +362,25 @@ unsafe def main (args : List String): IO UInt32 := do
282362
return 1
283363
let prNumber? := args[0]?
284364

285-
-- test: validate that all paths in `mathlibLabels` actually exist
365+
-- test: validate that all paths in `mathlibLabelData` actually exist
286366
let mut valid := true
287367
for label in mathlibLabels do
288-
for dir in label.dirs do
368+
let data := mathlibLabelData label
369+
for dir in data.dirs do
289370
unless ← FilePath.pathExists dir do
290371
-- print github annotation error
291372
println <| AutoLabel.githubAnnotation "error" "scripts/autolabel.lean"
292-
s!"Misformatted `{ ``AutoLabel.mathlibLabels }`"
293-
s!"directory '{dir}' does not exist but is included by label '{label.label}'. \
294-
Please update `{ ``AutoLabel.mathlibLabels }`!"
373+
s!"Misformatted `{ ``AutoLabel.mathlibLabelData }`"
374+
s!"directory '{dir}' does not exist but is included by label '{label}'. \
375+
Please update `{ ``AutoLabel.mathlibLabelData }`!"
295376
valid := false
296-
for dir in label.exclusions do
377+
for dir in data.exclusions do
297378
unless ← FilePath.pathExists dir do
298379
-- print github annotation error
299380
println <| AutoLabel.githubAnnotation "error" "scripts/autolabel.lean"
300-
s!"Misformatted `{ ``AutoLabel.mathlibLabels }`"
301-
s!"directory '{dir}' does not exist but is excluded by label '{label.label}'. \
302-
Please update `{ ``AutoLabel.mathlibLabels }`!"
381+
s!"Misformatted `{ ``AutoLabel.mathlibLabelData }`"
382+
s!"directory '{dir}' does not exist but is excluded by label '{label}'. \
383+
Please update `{ ``AutoLabel.mathlibLabelData }`!"
303384
valid := false
304385
unless valid do
305386
return 2
@@ -311,7 +392,7 @@ unsafe def main (args : List String): IO UInt32 := do
311392
-- note: only emitting a warning because the workflow is only triggered on the first commit
312393
-- of a PR and could therefore lead to unexpected behaviour if a folder was created later.
313394
println <| AutoLabel.githubAnnotation "warning" "scripts/autolabel.lean"
314-
s!"Incomplete `{ ``AutoLabel.mathlibLabels }`"
395+
s!"Incomplete `{ ``AutoLabel.mathlibLabelData }`"
315396
s!"the following paths inside `Mathlib/` are not covered \
316397
by any label: {notMatchedPaths} Please modify `AutoLabel.mathlibLabels` accordingly!"
317398
-- return 3
@@ -326,32 +407,32 @@ unsafe def main (args : List String): IO UInt32 := do
326407

327408
-- find labels covering the modified files
328409
let labels := getMatchingLabels modifiedFiles
329-
330410
println s!"::notice::Applicable labels: {labels}"
331411

332412
match labels with
333413
| #[] =>
334414
println s!"::warning::no label to add"
335-
| #[label] =>
415+
| newLabels =>
416+
if newLabels.size > MAX_LABELS then
417+
println s!"::notice::not adding more than {MAX_LABELS} labels: {labels}"
418+
return 0
336419
match prNumber? with
337420
| some n =>
338421
let labelsPresent ← IO.Process.run {
339422
cmd := "gh"
340423
args := #["pr", "view", n, "--json", "labels", "--jq", ".labels .[] .name"]}
341424
let labels := labelsPresent.splitToList (· == '\n')
342-
let autoLabels := mathlibLabels.map (·.label)
425+
let autoLabels := mathlibLabels.map (·.toString)
343426
match labels.filter autoLabels.contains with
344427
| [] => -- if the PR does not have a label that this script could add, then we add a label
345428
let _ ← IO.Process.run {
346429
cmd := "gh",
347-
args := #["pr", "edit", n, "--add-label", label] }
348-
println s!"::notice::added label: {label}"
430+
args := #["pr", "edit", n, "--add-label", s!"\"{",".intercalate (newLabels.map Label.toString).toList}\""] }
431+
println s!"::notice::added labels: {newLabels}"
349432
| t_labels_already_present =>
350-
println s!"::notice::Did not add label '{label}', since {t_labels_already_present} \
433+
println s!"::notice::Did not add labels '{newLabels}', since {t_labels_already_present} \
351434
were already present"
352435
| none =>
353436
println s!"::warning::no PR-number provided, not adding labels. \
354437
(call `lake exe autolabel 150602` to add the labels to PR `150602`)"
355-
| _ =>
356-
println s!"::notice::not adding multiple labels: {labels}"
357438
return 0

0 commit comments

Comments
 (0)