Skip to content

Commit 8f08330

Browse files
authored
Add support to show suggestions for inputs that already have native language characters (#53)
* Add support to show suggestions for inputs that already have native language characters * Applied changes after self review
1 parent ad87063 commit 8f08330

File tree

6 files changed

+44
-7
lines changed

6 files changed

+44
-7
lines changed

govarnam/constants.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ func getVSTLookupDirs() []string {
9898
}
9999
}
100100

101-
//FindVSTDir Get the VST storing directory
101+
// FindVSTDir Get the VST storing directory
102102
func FindVSTDir() (string, error) {
103103
for _, loc := range getVSTLookupDirs() {
104104
if dirExists(loc) {

govarnam/govarnam.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"sort"
1515
"strings"
1616
"time"
17+
"unicode"
1718
"unicode/utf8"
1819

1920
// sqlite3
@@ -25,6 +26,7 @@ type LangRules struct {
2526
Virama string
2627
IndicDigits bool
2728
PatternLongestLength int // Longest length of pattern in VST
29+
UnicodeBlock unicode.RangeTable
2830
}
2931

3032
// SchemeDetails of VST
@@ -247,8 +249,8 @@ func (varnam *Varnam) setDefaultConfig() {
247249
varnam.DictionaryMatchExact = false
248250

249251
varnam.LangRules.IndicDigits = false
250-
251252
varnam.LangRules.Virama, _ = varnam.getVirama()
253+
varnam.LangRules.UnicodeBlock = varnam.getUnicodeBlock()
252254

253255
if varnam.SchemeDetails.LangCode == "ml" {
254256
varnam.RegisterPatternWordPartializer(varnam.mlPatternWordPartializer)

govarnam/govarnam_ml_test.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,7 @@ func TestMLRecentlyLearnedWords(t *testing.T) {
478478
}
479479

480480
result, err = varnam.GetRecentlyLearntWords(context.Background(), 4, len(words))
481+
checkError(err)
481482
assertEqual(t, result[0].Word, "ആലപ്പുഴ")
482483
}
483484

@@ -495,3 +496,15 @@ func TestMLGetSuggestions(t *testing.T) {
495496

496497
assertEqual(t, result[0].Word, "ആലപ്പുഴ")
497498
}
499+
500+
func TestMLNativePartialWordsInInput(t *testing.T) {
501+
varnam := getVarnamInstance("ml")
502+
503+
words := []string{"ആലപ്പുഴ", "പുസ്തകം"}
504+
for _, word := range words {
505+
varnam.Learn(word, 0)
506+
}
507+
508+
assertEqual(t, varnam.TransliterateAdvanced("ആലppu").DictionarySuggestions[0].Word, "ആലപ്പുഴ")
509+
assertEqual(t, varnam.TransliterateAdvanced("puസ്ത").DictionarySuggestions[0].Word, "പുസ്തകം")
510+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package govarnam
22

3+
import "unicode"
4+
35
/**
46
* govarnam - An Indian language transliteration library
57
* Copyright Subin Siby <mail at subinsb (.) com>, 2021
@@ -23,3 +25,15 @@ func (varnam *Varnam) mlPatternWordPartializer(sug *Suggestion) {
2325
sug.Word = sug.Word[0:len(sug.Word)-size] + "മ"
2426
}
2527
}
28+
29+
func (varnam *Varnam) getUnicodeBlock() unicode.RangeTable {
30+
switch varnam.SchemeDetails.LangCode {
31+
case "kn":
32+
return unicode.RangeTable{R16: []unicode.Range16{{0x0C80, 0x0CFF, 1}}}
33+
case "ml":
34+
return unicode.RangeTable{R16: []unicode.Range16{{0x0D00, 0x0D7F, 1}}}
35+
default:
36+
return unicode.RangeTable{}
37+
}
38+
// TODO add for all languages
39+
}

govarnam/symbol.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"fmt"
1313
"log"
1414
"strings"
15+
"unicode"
1516

1617
"github.com/mattn/go-sqlite3"
1718
)
@@ -274,10 +275,17 @@ func (varnam *Varnam) tokenizeWord(ctx context.Context, word string, matchType i
274275
matches := varnam.findLongestPatternMatchSymbols(ctx, sequence, matchType, acceptCondition)
275276

276277
if len(matches) == 0 {
277-
// No matches, add a character token
278-
// Note that we just add 1 character, and move on
279-
token := Token{VARNAM_TOKEN_CHAR, matches, i, string(sequence[:1])}
280-
results = append(results, token)
278+
if unicode.In(sequence[0], &varnam.LangRules.UnicodeBlock) {
279+
// This helps to get suggestions in inputs like "ആലppu"
280+
character := string(sequence[0])
281+
token := Token{VARNAM_TOKEN_SYMBOL, []Symbol{{Value1: character}}, i, character}
282+
results = append(results, token)
283+
} else {
284+
// No matches, add a character token
285+
// Note that we just add 1 character, and move on
286+
token := Token{VARNAM_TOKEN_CHAR, matches, i, string(sequence[:1])}
287+
results = append(results, token)
288+
}
281289

282290
i++
283291
} else {

govarnamgo/govarnamgo_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ func tearDown() {
6767

6868
func TestMain(m *testing.M) {
6969
var err error
70-
testTempDir, err = os.TempDir("", "govarnam_test")
70+
testTempDir, err = os.MkdirTemp("", "govarnamgo_test")
7171
checkError(err)
7272

7373
setUp("ml")

0 commit comments

Comments
 (0)