diff --git a/CHANGELOG.md b/CHANGELOG.md index 88d7540..80b1700 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,13 +9,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- The index now supports whitespace and punctuation folding ([#25](https://github.com/ianlewis/go-stardict/issues/25)) -- The synonym index (.syn) file is now supported ([#2](https://github.com/ianlewis/go-stardict/issues/2)) -- `Stardict.Search` and `Idx.Search` now support queries in glob format ([#21](https://github.com/ianlewis/go-stardict/issues/21)) +- The index now supports whitespace and punctuation folding ([#25](https://github.com/ianlewis/go-stardict/issues/25)). +- The synonym index (.syn) file is now supported ([#2](https://github.com/ianlewis/go-stardict/issues/2)). +- `Stardict.Search` and `Idx.Search` now support queries in glob format ([#21](https://github.com/ianlewis/go-stardict/issues/21)). ### Changed - The minimum supported Go version is now 1.23. +- `stardict.Open` and `stardict.OpenAll` now take an `options` argument which allows for specifying options for opening dictionaries ([#87](https://github.com/ianlewis/go-stardict/issues/87)). +- `stardict.idx.Options.Folder` is now a constructor `func() transform.Transformer` rather than a static `golang.org/x/text/transform.Transformer` value ([#87](https://github.com/ianlewis/go-stardict/issues/87)). ## [0.1.0] - 2024-11-04 diff --git a/idx/idx.go b/idx/idx.go index 3e07adc..845e45f 100644 --- a/idx/idx.go +++ b/idx/idx.go @@ -63,8 +63,9 @@ func (w *foldedWord) String() string { // Options are options for the idx data. type Options struct { - // Folder is the transformer that performs folding on index entries. - Folder transform.Transformer + // Folder returns a [transform.Transformer] that performs folding (e.g. + // case folding, whitespace folding, etc.) on index entries. + Folder func() transform.Transformer // ScannerOptions are the options to use when reading the .idx file. ScannerOptions *ScannerOptions @@ -72,7 +73,9 @@ type Options struct { // DefaultOptions is the default options for an Idx. var DefaultOptions = &Options{ - Folder: transform.Nop, + Folder: func() transform.Transformer { + return transform.Nop + }, ScannerOptions: &ScannerOptions{ OffsetBits: 32, }, @@ -87,7 +90,7 @@ type Idx struct { index *index.Index[*foldedWord] // foldTransformer performs folding on text. - foldTransformer transform.Transformer + foldTransformer func() transform.Transformer } // New returns a new in-memory index. @@ -117,7 +120,7 @@ func NewWithSyn(idxReader, synReader io.ReadCloser, options *Options) (*Idx, err var words []*foldedWord for s.Scan() { word := s.Word() - folded, _, err := transform.String(idx.foldTransformer, word.Word) + folded, _, err := transform.String(idx.foldTransformer(), word.Word) if err != nil { return nil, fmt.Errorf("folding word %q: %w", word.Word, err) } @@ -139,7 +142,7 @@ func NewWithSyn(idxReader, synReader io.ReadCloser, options *Options) (*Idx, err } for synScanner.Scan() { word := synScanner.Word() - folded, _, err := transform.String(idx.foldTransformer, word.Word) + folded, _, err := transform.String(idx.foldTransformer(), word.Word) if err != nil { return nil, fmt.Errorf("folding word %q: %w", word.Word, err) } @@ -304,7 +307,7 @@ func (idx *Idx) foldGlob(q string) (string, error) { if syntax.Special(c) { if !isSpecial { if b.Len() > 0 { - w, _, err := transform.String(idx.foldTransformer, b.String()) + w, _, err := transform.String(idx.foldTransformer(), b.String()) if err != nil { return "", fmt.Errorf("folding query %q: %w", q, err) } @@ -330,7 +333,7 @@ func (idx *Idx) foldGlob(q string) (string, error) { w := b.String() if !isSpecial { // fold the word - fw, _, err := transform.String(idx.foldTransformer, w) + fw, _, err := transform.String(idx.foldTransformer(), w) if err != nil { return "", fmt.Errorf("folding query %q: %w", q, err) } diff --git a/idx/idx_test.go b/idx/idx_test.go index ce84ae0..9694f26 100644 --- a/idx/idx_test.go +++ b/idx/idx_test.go @@ -22,6 +22,7 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "golang.org/x/text/cases" + "golang.org/x/text/transform" "github.com/ianlewis/go-stardict/idx" "github.com/ianlewis/go-stardict/internal/testutil" @@ -222,7 +223,9 @@ func TestIdx_Search(t *testing.T) { }, idxoffsetbits: 32, options: &idx.Options{ - Folder: cases.Fold(), + Folder: func() transform.Transformer { + return cases.Fold() + }, }, expected: []*idx.Word{ @@ -261,7 +264,9 @@ func TestIdx_Search(t *testing.T) { }, idxoffsetbits: 32, options: &idx.Options{ - Folder: cases.Fold(), + Folder: func() transform.Transformer { + return cases.Fold() + }, }, // NOTE: The returned index word is the value in the index @@ -300,7 +305,9 @@ func TestIdx_Search(t *testing.T) { }, idxoffsetbits: 32, options: &idx.Options{ - Folder: cases.Fold(), + Folder: func() transform.Transformer { + return cases.Fold() + }, }, expected: nil, @@ -331,7 +338,9 @@ func TestIdx_Search(t *testing.T) { }, idxoffsetbits: 32, options: &idx.Options{ - Folder: cases.Fold(), + Folder: func() transform.Transformer { + return cases.Fold() + }, }, expected: nil, diff --git a/stardict.go b/stardict.go index 4856d42..9af1509 100644 --- a/stardict.go +++ b/stardict.go @@ -62,13 +62,14 @@ type Stardict struct { description string sametypesequence []dict.DataType - folder transform.Transformer + folder func() transform.Transformer } // Options are options for the Stardict dictionary. type Options struct { - // Folder performs folding (e.g. case folding, whitespace folding, etc.) on dictionary entries. - Folder transform.Transformer + // Folder returns a [transform.Transformer] that performs folding (e.g. + // case folding, whitespace folding, etc.) on dictionary entries. + Folder func() transform.Transformer } var ( @@ -109,23 +110,25 @@ func OpenAll(path string, options *Options) ([]*Stardict, []error) { func Open(path string, options *Options) (*Stardict, error) { if options == nil { options = &Options{ - Folder: transform.Chain( - // Unicode Normalization Form D (Canonical Decomposition. - norm.NFD, - // Perform case folding. - cases.Fold(), - // Perform whitespace folding. - &folding.WhitespaceFolder{}, - // Remove Non-spacing marks ([, ] {, }, etc.). - runes.Remove(runes.In(unicode.Mn)), - // Remove punctuation. - runes.Remove(runes.In(unicode.P)), - // Unicode Normalization Form C (Canonical Decomposition, followed by Canonical Composition) - // NOTE: Case folding does not normalize the input and may not - // preserve a normal form. Canonical Decomposition is thus necessary - // to be performed a second time. - norm.NFC, - ), + Folder: func() transform.Transformer { + return transform.Chain( + // Unicode Normalization Form D (Canonical Decomposition. + norm.NFD, + // Perform case folding. + cases.Fold(), + // Perform whitespace folding. + &folding.WhitespaceFolder{}, + // Remove Non-spacing marks ([, ] {, }, etc.). + runes.Remove(runes.In(unicode.Mn)), + // Remove punctuation. + runes.Remove(runes.In(unicode.P)), + // Unicode Normalization Form C (Canonical Decomposition, followed by Canonical Composition) + // NOTE: Case folding does not normalize the input and may not + // preserve a normal form. Canonical Decomposition is thus necessary + // to be performed a second time. + norm.NFC, + ) + }, } } @@ -134,7 +137,9 @@ func Open(path string, options *Options) (*Stardict, error) { idxoffsetbits: 32, } - s.folder = transform.Nop + s.folder = func() transform.Transformer { + return transform.Nop + } if options.Folder != nil { s.folder = options.Folder } diff --git a/syn/syn.go b/syn/syn.go index ec20a23..91adf0a 100644 --- a/syn/syn.go +++ b/syn/syn.go @@ -48,13 +48,16 @@ func (w *foldedWord) String() string { // Options are options for the idx data. type Options struct { - // Folder is the transformer that performs folding on index entries. - Folder transform.Transformer + // Folder returns a [transform.Transformer] that performs folding (e.g. + // case folding, whitespace folding, etc.) on index entries. + Folder func() transform.Transformer } // DefaultOptions is the default options for a Syn. var DefaultOptions = &Options{ - Folder: transform.Nop, + Folder: func() transform.Transformer { + return transform.Nop + }, } // Syn is is the synonym index. It is largely a map of synonym words to related @@ -64,7 +67,7 @@ type Syn struct { index *index.Index[*foldedWord] // foldTransformer performs folding on text. - foldTransformer transform.Transformer + foldTransformer func() transform.Transformer } // New returns a new Syn by reading the data from r. @@ -89,7 +92,7 @@ func New(r io.ReadCloser, options *Options) (*Syn, error) { var words []*foldedWord for s.Scan() { word := s.Word() - folded, _, err := transform.String(syn.foldTransformer, word.Word) + folded, _, err := transform.String(syn.foldTransformer(), word.Word) if err != nil { return nil, fmt.Errorf("folding word %q: %w", word.Word, err) } @@ -168,7 +171,7 @@ func Open(ifoPath string) (*os.File, error) { // Search performs a query of the index and returns matching words. func (syn *Syn) Search(query string) ([]*Word, error) { - foldedQuery, _, err := transform.String(syn.foldTransformer, query) + foldedQuery, _, err := transform.String(syn.foldTransformer(), query) if err != nil { return nil, fmt.Errorf("folding query %q: %w", query, err) }