Skip to content

Commit 8dcc59b

Browse files
authored
fix: create Transformers on demand (#90)
Signed-off-by: Ian Lewis <ian@ianlewis.org>
1 parent a3b74f9 commit 8dcc59b

File tree

5 files changed

+64
-42
lines changed

5 files changed

+64
-42
lines changed

CHANGELOG.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
### Added
1111

12-
- The index now supports whitespace and punctuation folding ([#25](https://github.com/ianlewis/go-stardict/issues/25))
13-
- The synonym index (.syn) file is now supported ([#2](https://github.com/ianlewis/go-stardict/issues/2))
14-
- `Stardict.Search` and `Idx.Search` now support queries in glob format ([#21](https://github.com/ianlewis/go-stardict/issues/21))
12+
- The index now supports whitespace and punctuation folding ([#25](https://github.com/ianlewis/go-stardict/issues/25)).
13+
- The synonym index (.syn) file is now supported ([#2](https://github.com/ianlewis/go-stardict/issues/2)).
14+
- `Stardict.Search` and `Idx.Search` now support queries in glob format ([#21](https://github.com/ianlewis/go-stardict/issues/21)).
1515

1616
### Changed
1717

1818
- The minimum supported Go version is now 1.23.
19+
- `stardict.Open` and `stardict.OpenAll` now take an `options` argument which allows for specifying options for opening dictionaries ([#87](https://github.com/ianlewis/go-stardict/issues/87)).
20+
- `stardict.idx.Options.Folder` is now a constructor `func() transform.Transformer` rather than a static `golang.org/x/text/transform.Transformer` value ([#87](https://github.com/ianlewis/go-stardict/issues/87)).
1921

2022
## [0.1.0] - 2024-11-04
2123

idx/idx.go

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -63,16 +63,19 @@ func (w *foldedWord) String() string {
6363

6464
// Options are options for the idx data.
6565
type Options struct {
66-
// Folder is the transformer that performs folding on index entries.
67-
Folder transform.Transformer
66+
// Folder returns a [transform.Transformer] that performs folding (e.g.
67+
// case folding, whitespace folding, etc.) on index entries.
68+
Folder func() transform.Transformer
6869

6970
// ScannerOptions are the options to use when reading the .idx file.
7071
ScannerOptions *ScannerOptions
7172
}
7273

7374
// DefaultOptions is the default options for an Idx.
7475
var DefaultOptions = &Options{
75-
Folder: transform.Nop,
76+
Folder: func() transform.Transformer {
77+
return transform.Nop
78+
},
7679
ScannerOptions: &ScannerOptions{
7780
OffsetBits: 32,
7881
},
@@ -87,7 +90,7 @@ type Idx struct {
8790
index *index.Index[*foldedWord]
8891

8992
// foldTransformer performs folding on text.
90-
foldTransformer transform.Transformer
93+
foldTransformer func() transform.Transformer
9194
}
9295

9396
// New returns a new in-memory index.
@@ -117,7 +120,7 @@ func NewWithSyn(idxReader, synReader io.ReadCloser, options *Options) (*Idx, err
117120
var words []*foldedWord
118121
for s.Scan() {
119122
word := s.Word()
120-
folded, _, err := transform.String(idx.foldTransformer, word.Word)
123+
folded, _, err := transform.String(idx.foldTransformer(), word.Word)
121124
if err != nil {
122125
return nil, fmt.Errorf("folding word %q: %w", word.Word, err)
123126
}
@@ -139,7 +142,7 @@ func NewWithSyn(idxReader, synReader io.ReadCloser, options *Options) (*Idx, err
139142
}
140143
for synScanner.Scan() {
141144
word := synScanner.Word()
142-
folded, _, err := transform.String(idx.foldTransformer, word.Word)
145+
folded, _, err := transform.String(idx.foldTransformer(), word.Word)
143146
if err != nil {
144147
return nil, fmt.Errorf("folding word %q: %w", word.Word, err)
145148
}
@@ -304,7 +307,7 @@ func (idx *Idx) foldGlob(q string) (string, error) {
304307
if syntax.Special(c) {
305308
if !isSpecial {
306309
if b.Len() > 0 {
307-
w, _, err := transform.String(idx.foldTransformer, b.String())
310+
w, _, err := transform.String(idx.foldTransformer(), b.String())
308311
if err != nil {
309312
return "", fmt.Errorf("folding query %q: %w", q, err)
310313
}
@@ -330,7 +333,7 @@ func (idx *Idx) foldGlob(q string) (string, error) {
330333
w := b.String()
331334
if !isSpecial {
332335
// fold the word
333-
fw, _, err := transform.String(idx.foldTransformer, w)
336+
fw, _, err := transform.String(idx.foldTransformer(), w)
334337
if err != nil {
335338
return "", fmt.Errorf("folding query %q: %w", q, err)
336339
}

idx/idx_test.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"github.com/google/go-cmp/cmp"
2323
"github.com/google/go-cmp/cmp/cmpopts"
2424
"golang.org/x/text/cases"
25+
"golang.org/x/text/transform"
2526

2627
"github.com/ianlewis/go-stardict/idx"
2728
"github.com/ianlewis/go-stardict/internal/testutil"
@@ -222,7 +223,9 @@ func TestIdx_Search(t *testing.T) {
222223
},
223224
idxoffsetbits: 32,
224225
options: &idx.Options{
225-
Folder: cases.Fold(),
226+
Folder: func() transform.Transformer {
227+
return cases.Fold()
228+
},
226229
},
227230

228231
expected: []*idx.Word{
@@ -261,7 +264,9 @@ func TestIdx_Search(t *testing.T) {
261264
},
262265
idxoffsetbits: 32,
263266
options: &idx.Options{
264-
Folder: cases.Fold(),
267+
Folder: func() transform.Transformer {
268+
return cases.Fold()
269+
},
265270
},
266271

267272
// NOTE: The returned index word is the value in the index
@@ -300,7 +305,9 @@ func TestIdx_Search(t *testing.T) {
300305
},
301306
idxoffsetbits: 32,
302307
options: &idx.Options{
303-
Folder: cases.Fold(),
308+
Folder: func() transform.Transformer {
309+
return cases.Fold()
310+
},
304311
},
305312

306313
expected: nil,
@@ -331,7 +338,9 @@ func TestIdx_Search(t *testing.T) {
331338
},
332339
idxoffsetbits: 32,
333340
options: &idx.Options{
334-
Folder: cases.Fold(),
341+
Folder: func() transform.Transformer {
342+
return cases.Fold()
343+
},
335344
},
336345

337346
expected: nil,

stardict.go

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,14 @@ type Stardict struct {
6262
description string
6363
sametypesequence []dict.DataType
6464

65-
folder transform.Transformer
65+
folder func() transform.Transformer
6666
}
6767

6868
// Options are options for the Stardict dictionary.
6969
type Options struct {
70-
// Folder performs folding (e.g. case folding, whitespace folding, etc.) on dictionary entries.
71-
Folder transform.Transformer
70+
// Folder returns a [transform.Transformer] that performs folding (e.g.
71+
// case folding, whitespace folding, etc.) on dictionary entries.
72+
Folder func() transform.Transformer
7273
}
7374

7475
var (
@@ -109,23 +110,25 @@ func OpenAll(path string, options *Options) ([]*Stardict, []error) {
109110
func Open(path string, options *Options) (*Stardict, error) {
110111
if options == nil {
111112
options = &Options{
112-
Folder: transform.Chain(
113-
// Unicode Normalization Form D (Canonical Decomposition.
114-
norm.NFD,
115-
// Perform case folding.
116-
cases.Fold(),
117-
// Perform whitespace folding.
118-
&folding.WhitespaceFolder{},
119-
// Remove Non-spacing marks ([, ] {, }, etc.).
120-
runes.Remove(runes.In(unicode.Mn)),
121-
// Remove punctuation.
122-
runes.Remove(runes.In(unicode.P)),
123-
// Unicode Normalization Form C (Canonical Decomposition, followed by Canonical Composition)
124-
// NOTE: Case folding does not normalize the input and may not
125-
// preserve a normal form. Canonical Decomposition is thus necessary
126-
// to be performed a second time.
127-
norm.NFC,
128-
),
113+
Folder: func() transform.Transformer {
114+
return transform.Chain(
115+
// Unicode Normalization Form D (Canonical Decomposition.
116+
norm.NFD,
117+
// Perform case folding.
118+
cases.Fold(),
119+
// Perform whitespace folding.
120+
&folding.WhitespaceFolder{},
121+
// Remove Non-spacing marks ([, ] {, }, etc.).
122+
runes.Remove(runes.In(unicode.Mn)),
123+
// Remove punctuation.
124+
runes.Remove(runes.In(unicode.P)),
125+
// Unicode Normalization Form C (Canonical Decomposition, followed by Canonical Composition)
126+
// NOTE: Case folding does not normalize the input and may not
127+
// preserve a normal form. Canonical Decomposition is thus necessary
128+
// to be performed a second time.
129+
norm.NFC,
130+
)
131+
},
129132
}
130133
}
131134

@@ -134,7 +137,9 @@ func Open(path string, options *Options) (*Stardict, error) {
134137
idxoffsetbits: 32,
135138
}
136139

137-
s.folder = transform.Nop
140+
s.folder = func() transform.Transformer {
141+
return transform.Nop
142+
}
138143
if options.Folder != nil {
139144
s.folder = options.Folder
140145
}

syn/syn.go

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,16 @@ func (w *foldedWord) String() string {
4848

4949
// Options are options for the idx data.
5050
type Options struct {
51-
// Folder is the transformer that performs folding on index entries.
52-
Folder transform.Transformer
51+
// Folder returns a [transform.Transformer] that performs folding (e.g.
52+
// case folding, whitespace folding, etc.) on index entries.
53+
Folder func() transform.Transformer
5354
}
5455

5556
// DefaultOptions is the default options for a Syn.
5657
var DefaultOptions = &Options{
57-
Folder: transform.Nop,
58+
Folder: func() transform.Transformer {
59+
return transform.Nop
60+
},
5861
}
5962

6063
// Syn is is the synonym index. It is largely a map of synonym words to related
@@ -64,7 +67,7 @@ type Syn struct {
6467
index *index.Index[*foldedWord]
6568

6669
// foldTransformer performs folding on text.
67-
foldTransformer transform.Transformer
70+
foldTransformer func() transform.Transformer
6871
}
6972

7073
// New returns a new Syn by reading the data from r.
@@ -89,7 +92,7 @@ func New(r io.ReadCloser, options *Options) (*Syn, error) {
8992
var words []*foldedWord
9093
for s.Scan() {
9194
word := s.Word()
92-
folded, _, err := transform.String(syn.foldTransformer, word.Word)
95+
folded, _, err := transform.String(syn.foldTransformer(), word.Word)
9396
if err != nil {
9497
return nil, fmt.Errorf("folding word %q: %w", word.Word, err)
9598
}
@@ -168,7 +171,7 @@ func Open(ifoPath string) (*os.File, error) {
168171

169172
// Search performs a query of the index and returns matching words.
170173
func (syn *Syn) Search(query string) ([]*Word, error) {
171-
foldedQuery, _, err := transform.String(syn.foldTransformer, query)
174+
foldedQuery, _, err := transform.String(syn.foldTransformer(), query)
172175
if err != nil {
173176
return nil, fmt.Errorf("folding query %q: %w", query, err)
174177
}

0 commit comments

Comments
 (0)