Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- The index now supports whitespace and punctuation folding ([#25](https://github.com/ianlewis/go-stardict/issues/25))
- The synonym index (.syn) file is now supported ([#2](https://github.com/ianlewis/go-stardict/issues/2))
- `Stardict.Search` and `Idx.Search` now support queries in glob format ([#21](https://github.com/ianlewis/go-stardict/issues/21))
- The index now supports whitespace and punctuation folding ([#25](https://github.com/ianlewis/go-stardict/issues/25)).
- The synonym index (.syn) file is now supported ([#2](https://github.com/ianlewis/go-stardict/issues/2)).
- `Stardict.Search` and `Idx.Search` now support queries in glob format ([#21](https://github.com/ianlewis/go-stardict/issues/21)).

### Changed

- The minimum supported Go version is now 1.23.
- `stardict.Open` and `stardict.OpenAll` now take an `options` argument which allows for specifying options for opening dictionaries ([#87](https://github.com/ianlewis/go-stardict/issues/87)).
- `stardict.idx.Options.Folder` is now a constructor `func() transform.Transformer` rather than a static `golang.org/x/text/transform.Transformer` value ([#87](https://github.com/ianlewis/go-stardict/issues/87)).

## [0.1.0] - 2024-11-04

Expand Down
19 changes: 11 additions & 8 deletions idx/idx.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,19 @@ func (w *foldedWord) String() string {

// Options are options for the idx data.
type Options struct {
// Folder is the transformer that performs folding on index entries.
Folder transform.Transformer
// Folder returns a [transform.Transformer] that performs folding (e.g.
// case folding, whitespace folding, etc.) on index entries.
Folder func() transform.Transformer

// ScannerOptions are the options to use when reading the .idx file.
ScannerOptions *ScannerOptions
}

// DefaultOptions is the default options for an Idx.
var DefaultOptions = &Options{
Folder: transform.Nop,
Folder: func() transform.Transformer {
return transform.Nop
},
ScannerOptions: &ScannerOptions{
OffsetBits: 32,
},
Expand All @@ -87,7 +90,7 @@ type Idx struct {
index *index.Index[*foldedWord]

// foldTransformer performs folding on text.
foldTransformer transform.Transformer
foldTransformer func() transform.Transformer
}

// New returns a new in-memory index.
Expand Down Expand Up @@ -117,7 +120,7 @@ func NewWithSyn(idxReader, synReader io.ReadCloser, options *Options) (*Idx, err
var words []*foldedWord
for s.Scan() {
word := s.Word()
folded, _, err := transform.String(idx.foldTransformer, word.Word)
folded, _, err := transform.String(idx.foldTransformer(), word.Word)
if err != nil {
return nil, fmt.Errorf("folding word %q: %w", word.Word, err)
}
Expand All @@ -139,7 +142,7 @@ func NewWithSyn(idxReader, synReader io.ReadCloser, options *Options) (*Idx, err
}
for synScanner.Scan() {
word := synScanner.Word()
folded, _, err := transform.String(idx.foldTransformer, word.Word)
folded, _, err := transform.String(idx.foldTransformer(), word.Word)
if err != nil {
return nil, fmt.Errorf("folding word %q: %w", word.Word, err)
}
Expand Down Expand Up @@ -304,7 +307,7 @@ func (idx *Idx) foldGlob(q string) (string, error) {
if syntax.Special(c) {
if !isSpecial {
if b.Len() > 0 {
w, _, err := transform.String(idx.foldTransformer, b.String())
w, _, err := transform.String(idx.foldTransformer(), b.String())
if err != nil {
return "", fmt.Errorf("folding query %q: %w", q, err)
}
Expand All @@ -330,7 +333,7 @@ func (idx *Idx) foldGlob(q string) (string, error) {
w := b.String()
if !isSpecial {
// fold the word
fw, _, err := transform.String(idx.foldTransformer, w)
fw, _, err := transform.String(idx.foldTransformer(), w)
if err != nil {
return "", fmt.Errorf("folding query %q: %w", q, err)
}
Expand Down
17 changes: 13 additions & 4 deletions idx/idx_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"golang.org/x/text/cases"
"golang.org/x/text/transform"

"github.com/ianlewis/go-stardict/idx"
"github.com/ianlewis/go-stardict/internal/testutil"
Expand Down Expand Up @@ -222,7 +223,9 @@ func TestIdx_Search(t *testing.T) {
},
idxoffsetbits: 32,
options: &idx.Options{
Folder: cases.Fold(),
Folder: func() transform.Transformer {
return cases.Fold()
},
},

expected: []*idx.Word{
Expand Down Expand Up @@ -261,7 +264,9 @@ func TestIdx_Search(t *testing.T) {
},
idxoffsetbits: 32,
options: &idx.Options{
Folder: cases.Fold(),
Folder: func() transform.Transformer {
return cases.Fold()
},
},

// NOTE: The returned index word is the value in the index
Expand Down Expand Up @@ -300,7 +305,9 @@ func TestIdx_Search(t *testing.T) {
},
idxoffsetbits: 32,
options: &idx.Options{
Folder: cases.Fold(),
Folder: func() transform.Transformer {
return cases.Fold()
},
},

expected: nil,
Expand Down Expand Up @@ -331,7 +338,9 @@ func TestIdx_Search(t *testing.T) {
},
idxoffsetbits: 32,
options: &idx.Options{
Folder: cases.Fold(),
Folder: func() transform.Transformer {
return cases.Fold()
},
},

expected: nil,
Expand Down
47 changes: 26 additions & 21 deletions stardict.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,14 @@ type Stardict struct {
description string
sametypesequence []dict.DataType

folder transform.Transformer
folder func() transform.Transformer
}

// Options are options for the Stardict dictionary.
type Options struct {
// Folder performs folding (e.g. case folding, whitespace folding, etc.) on dictionary entries.
Folder transform.Transformer
// Folder returns a [transform.Transformer] that performs folding (e.g.
// case folding, whitespace folding, etc.) on dictionary entries.
Folder func() transform.Transformer
}

var (
Expand Down Expand Up @@ -109,23 +110,25 @@ func OpenAll(path string, options *Options) ([]*Stardict, []error) {
func Open(path string, options *Options) (*Stardict, error) {
if options == nil {
options = &Options{
Folder: transform.Chain(
// Unicode Normalization Form D (Canonical Decomposition.
norm.NFD,
// Perform case folding.
cases.Fold(),
// Perform whitespace folding.
&folding.WhitespaceFolder{},
// Remove Non-spacing marks ([, ] {, }, etc.).
runes.Remove(runes.In(unicode.Mn)),
// Remove punctuation.
runes.Remove(runes.In(unicode.P)),
// Unicode Normalization Form C (Canonical Decomposition, followed by Canonical Composition)
// NOTE: Case folding does not normalize the input and may not
// preserve a normal form. Canonical Decomposition is thus necessary
// to be performed a second time.
norm.NFC,
),
Folder: func() transform.Transformer {
return transform.Chain(
// Unicode Normalization Form D (Canonical Decomposition.
norm.NFD,
// Perform case folding.
cases.Fold(),
// Perform whitespace folding.
&folding.WhitespaceFolder{},
// Remove Non-spacing marks ([, ] {, }, etc.).
runes.Remove(runes.In(unicode.Mn)),
// Remove punctuation.
runes.Remove(runes.In(unicode.P)),
// Unicode Normalization Form C (Canonical Decomposition, followed by Canonical Composition)
// NOTE: Case folding does not normalize the input and may not
// preserve a normal form. Canonical Decomposition is thus necessary
// to be performed a second time.
norm.NFC,
)
},
}
}

Expand All @@ -134,7 +137,9 @@ func Open(path string, options *Options) (*Stardict, error) {
idxoffsetbits: 32,
}

s.folder = transform.Nop
s.folder = func() transform.Transformer {
return transform.Nop
}
if options.Folder != nil {
s.folder = options.Folder
}
Expand Down
15 changes: 9 additions & 6 deletions syn/syn.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,16 @@ func (w *foldedWord) String() string {

// Options are options for the idx data.
type Options struct {
// Folder is the transformer that performs folding on index entries.
Folder transform.Transformer
// Folder returns a [transform.Transformer] that performs folding (e.g.
// case folding, whitespace folding, etc.) on index entries.
Folder func() transform.Transformer
}

// DefaultOptions is the default options for a Syn.
var DefaultOptions = &Options{
Folder: transform.Nop,
Folder: func() transform.Transformer {
return transform.Nop
},
}

// Syn is is the synonym index. It is largely a map of synonym words to related
Expand All @@ -64,7 +67,7 @@ type Syn struct {
index *index.Index[*foldedWord]

// foldTransformer performs folding on text.
foldTransformer transform.Transformer
foldTransformer func() transform.Transformer
}

// New returns a new Syn by reading the data from r.
Expand All @@ -89,7 +92,7 @@ func New(r io.ReadCloser, options *Options) (*Syn, error) {
var words []*foldedWord
for s.Scan() {
word := s.Word()
folded, _, err := transform.String(syn.foldTransformer, word.Word)
folded, _, err := transform.String(syn.foldTransformer(), word.Word)
if err != nil {
return nil, fmt.Errorf("folding word %q: %w", word.Word, err)
}
Expand Down Expand Up @@ -168,7 +171,7 @@ func Open(ifoPath string) (*os.File, error) {

// Search performs a query of the index and returns matching words.
func (syn *Syn) Search(query string) ([]*Word, error) {
foldedQuery, _, err := transform.String(syn.foldTransformer, query)
foldedQuery, _, err := transform.String(syn.foldTransformer(), query)
if err != nil {
return nil, fmt.Errorf("folding query %q: %w", query, err)
}
Expand Down
Loading