Skip to content

Commit 2b94838

Browse files
authored
chore: new generator (#32)
1 parent dda27aa commit 2b94838

File tree

12 files changed

+53956
-19
lines changed

12 files changed

+53956
-19
lines changed

.github/workflows/ci.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,13 @@ jobs:
3636
git diff --exit-code go.mod
3737
git diff --exit-code go.sum
3838
39+
- name: Check generated files
40+
run: |
41+
make generate
42+
git diff --exit-code words.go
43+
git diff --exit-code words_uk.go
44+
git diff --exit-code words_us.go
45+
3946
- name: golangci-lint
4047
uses: golangci/golangci-lint-action@v7
4148
with:

CONTRIBUTING.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Contributing
2+
3+
The files `words.go`, `works_uk.go`, and `works_us.go` must never be edited by hand.
4+
5+
## Adding a word
6+
7+
Misspell is neither a complete spell-checking program nor a grammar checker.
8+
It is a tool to correct commonly misspelled English words.
9+
10+
The list of words must contain only common mistakes.
11+
12+
Before adding a word, you should have information about the misspelling frequency.
13+
14+
- [ ] more than 15k inside GitHub (this limit is arbitrary and can involve)
15+
- [ ] don't exist inside the Wiktionary (as a modern form)
16+
- [ ] don't exist inside the Cambridge Dictionary (as a modern form)
17+
- [ ] don't exist inside the Oxford Dictionary (as a modern form)
18+
19+
If all criteria are met, a word can be added to the list of misspellings.
20+
21+
The word should be added to one of the following files.
22+
23+
- `internal/gen/sources/main.json`: common words.
24+
- `internal/gen/sources/uk.json`: UK only words.
25+
- `internal/gen/sources/us.json`: US only words.
26+
27+
The target `make generate` will generate the Go files.
28+
29+
The PR description must provide all the information (links) about the misspelling frequency.

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
CONTAINER=golangci/misspell
22

3-
default: lint test build
3+
default: generate lint test build
44

55
install: ## install misspell into GOPATH/bin
66
go install ./cmd/misspell
@@ -14,6 +14,9 @@ test: ## run all tests
1414
lint: ## run linter
1515
golangci-lint run
1616

17+
generate:
18+
go run ./internal/gen/
19+
1720
# the grep in line 2 is to remove misspellings in the spelling dictionary
1821
# that trigger false positives!!
1922
falsepositives: /scowl-wl

internal/gen/dict.go.tmpl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// Code generated by 'internal/gen'. DO NOT EDIT.
2+
3+
package misspell
4+
5+
// Dict{{ .Name }} {{ .Comment }}
6+
var Dict{{ .Name }} = []string{
7+
{{- range $index, $tuple := .Tuples }}
8+
"{{ $tuple.Typo }}", "{{ $tuple.Correction }}",
9+
{{- end }}
10+
}

internal/gen/gen.go

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
package main
2+
3+
import (
4+
"bytes"
5+
"cmp"
6+
_ "embed"
7+
"encoding/json"
8+
"go/format"
9+
"log"
10+
"os"
11+
"slices"
12+
"text/template"
13+
)
14+
15+
//go:embed dict.go.tmpl
16+
var dictTemplate string
17+
18+
type Tuple struct {
19+
Typo string
20+
Correction string
21+
}
22+
23+
type Info struct {
24+
Name string
25+
Comment string
26+
Path string
27+
}
28+
29+
// regenerate words Go files from JSON files.
30+
func main() {
31+
dictionaries := map[string]Info{
32+
"words.go": {
33+
Name: "Main",
34+
Comment: "is the main rule set, not including locale-specific spellings",
35+
Path: "internal/gen/sources/main.json",
36+
},
37+
"words_uk.go": {
38+
Name: "British",
39+
Comment: "converts US spellings to UK spellings",
40+
Path: "internal/gen/sources/uk.json",
41+
},
42+
"words_us.go": {
43+
Name: "American",
44+
Comment: "converts UK spellings to US spellings",
45+
Path: "internal/gen/sources/us.json",
46+
},
47+
}
48+
49+
for dest, src := range dictionaries {
50+
err := generate(src, dest)
51+
if err != nil {
52+
log.Fatal(err)
53+
}
54+
}
55+
}
56+
57+
func generate(src Info, dest string) error {
58+
data, err := read(src.Path)
59+
if err != nil {
60+
return err
61+
}
62+
63+
tuples := toTuples(data)
64+
65+
return write(tuples, src, dest)
66+
}
67+
68+
func toTuples(data map[string][]string) []Tuple {
69+
var tuples []Tuple
70+
71+
for c, typos := range data {
72+
for _, typo := range typos {
73+
tuples = append(tuples, Tuple{Typo: typo, Correction: c})
74+
}
75+
}
76+
77+
return tuples
78+
}
79+
80+
func read(src string) (map[string][]string, error) {
81+
file, err := os.Open(src)
82+
if err != nil {
83+
return nil, err
84+
}
85+
86+
defer func() {
87+
_ = file.Close()
88+
}()
89+
90+
all := make(map[string][]string)
91+
92+
err = json.NewDecoder(file).Decode(&all)
93+
if err != nil {
94+
return nil, err
95+
}
96+
97+
return all, nil
98+
}
99+
100+
func write(tuples []Tuple, src Info, dest string) error {
101+
slices.SortStableFunc(tuples, func(a, b Tuple) int {
102+
if len(a.Typo) == len(b.Typo) {
103+
// if words are same size, then use
104+
// normal alphabetical order
105+
return cmp.Compare(a.Typo, b.Typo)
106+
}
107+
// INVERTED -- biggest words first
108+
return cmp.Compare(len(b.Typo), len(a.Typo))
109+
})
110+
111+
tmpl, err := template.New("words").Parse(dictTemplate)
112+
if err != nil {
113+
return err
114+
}
115+
116+
var buf bytes.Buffer
117+
118+
err = tmpl.Execute(&buf, map[string]any{
119+
"Name": src.Name,
120+
"Comment": src.Comment,
121+
"Tuples": tuples,
122+
})
123+
if err != nil {
124+
return err
125+
}
126+
127+
source, err := format.Source(buf.Bytes())
128+
if err != nil {
129+
return err
130+
}
131+
132+
words, err := os.Create(dest)
133+
if err != nil {
134+
return err
135+
}
136+
137+
defer func() {
138+
_ = words.Close()
139+
}()
140+
141+
_, err = words.Write(source)
142+
if err != nil {
143+
return err
144+
}
145+
146+
return nil
147+
}

0 commit comments

Comments
 (0)