Skip to content

Commit ff0cb6f

Browse files
committed
Unicode tests, fixes.
1 parent 72db90e commit ff0cb6f

File tree

2 files changed

+135
-16
lines changed

2 files changed

+135
-16
lines changed

ext/unicode/unicode.go

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,17 @@
1-
// Package unicode provides a replacement for the SQLite ICU extension.
1+
// Package unicode provides an alternative to the SQLite ICU extension.
22
//
33
// Provides Unicode aware:
44
// - upper and lower functions,
5-
// - LIKE and REGEX operators,
5+
// - LIKE and REGEXP operators,
66
// - collation sequences.
7+
//
8+
// This package is not 100% compatible with the ICU extension:
9+
// - upper and lower use [strings.ToUpper], [strings.ToLower] and [cases];
10+
// - the LIKE operator follows [strings.EqualFold] rules;
11+
// - the REGEXP operator uses Go [regex/syntax];
12+
// - collation sequences use [collate].
13+
//
14+
// Expect subtle differences (e.g.) in the handling of Turkish case folding.
715
package unicode
816

917
import (
@@ -20,7 +28,7 @@ import (
2028
)
2129

2230
// Register registers Unicode aware functions for a database connection.
23-
func Register(db sqlite3.Conn) {
31+
func Register(db *sqlite3.Conn) {
2432
flags := sqlite3.DETERMINISTIC | sqlite3.INNOCUOUS
2533

2634
db.CreateFunction("like", 2, flags, like)
@@ -58,7 +66,7 @@ func upper(ctx sqlite3.Context, arg ...sqlite3.Value) {
5866
}
5967
cs, ok := ctx.GetAuxData(1).(cases.Caser)
6068
if !ok {
61-
t, err := language.Parse(arg[0].Text())
69+
t, err := language.Parse(arg[1].Text())
6270
if err != nil {
6371
ctx.ResultError(err)
6472
return
@@ -77,7 +85,7 @@ func lower(ctx sqlite3.Context, arg ...sqlite3.Value) {
7785
}
7886
cs, ok := ctx.GetAuxData(1).(cases.Caser)
7987
if !ok {
80-
t, err := language.Parse(arg[0].Text())
88+
t, err := language.Parse(arg[1].Text())
8189
if err != nil {
8290
ctx.ResultError(err)
8391
return
@@ -137,7 +145,8 @@ func like2regex(pattern string, escape rune) string {
137145
var re strings.Builder
138146
start := 0
139147
literal := false
140-
re.WriteString(`(?is)`) // case insensitive, . matches any character
148+
re.Grow(len(pattern) + 10)
149+
re.WriteString(`(?is)\A`) // case insensitive, . matches any character
141150
for i, r := range pattern {
142151
if start < 0 {
143152
start = i
@@ -164,5 +173,6 @@ func like2regex(pattern string, escape rune) string {
164173
if start >= 0 {
165174
re.WriteString(regexp.QuoteMeta(pattern[start:]))
166175
}
176+
re.WriteString(`\z`)
167177
return re.String()
168178
}

ext/unicode/unicode_test.go

Lines changed: 119 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,134 @@
11
package unicode
22

3-
import "testing"
3+
import (
4+
"errors"
5+
"testing"
6+
7+
"github.com/ncruces/go-sqlite3"
8+
_ "github.com/ncruces/go-sqlite3/embed"
9+
)
10+
11+
func TestRegister(t *testing.T) {
12+
t.Parallel()
13+
14+
db, err := sqlite3.Open(":memory:")
15+
if err != nil {
16+
t.Fatal(err)
17+
}
18+
defer db.Close()
19+
20+
exec := func(fn string) string {
21+
stmt, _, err := db.Prepare(`SELECT ` + fn)
22+
if err != nil {
23+
t.Fatal(err)
24+
}
25+
defer stmt.Close()
26+
27+
if stmt.Step() {
28+
return stmt.ColumnText(0)
29+
}
30+
t.Fatal(stmt.Err())
31+
return ""
32+
}
33+
34+
Register(db)
35+
36+
tests := []struct {
37+
test string
38+
want string
39+
}{
40+
{`upper('hello')`, "HELLO"},
41+
{`lower('HELLO')`, "hello"},
42+
{`upper('привет')`, "ПРИВЕТ"},
43+
{`lower('ПРИВЕТ')`, "привет"},
44+
{`upper('istanbul')`, "ISTANBUL"},
45+
{`upper('istanbul', 'tr-TR')`, "İSTANBUL"},
46+
{`lower('Dünyanın İlk Borsası', 'tr-TR')`, "dünyanın ilk borsası"},
47+
{`upper('Dünyanın İlk Borsası', 'tr-TR')`, "DÜNYANIN İLK BORSASI"},
48+
{`'Hello' REGEXP 'ell'`, "1"},
49+
{`'Hello' REGEXP 'el.'`, "1"},
50+
{`'Hello' LIKE 'hel_'`, "0"},
51+
{`'Hello' LIKE 'hel%'`, "1"},
52+
{`'Hello' LIKE 'h_llo'`, "1"},
53+
{`'Hello' LIKE 'hello'`, "1"},
54+
{`'Привет' LIKE 'ПРИВЕТ'`, "1"},
55+
{`'100%' LIKE '100|%' ESCAPE '|'`, "1"},
56+
}
57+
58+
for _, tt := range tests {
59+
t.Run(tt.test, func(t *testing.T) {
60+
if got := exec(tt.test); got != tt.want {
61+
t.Errorf("exec(%q) = %q, want %q", tt.test, got, tt.want)
62+
}
63+
})
64+
}
65+
}
66+
67+
func TestRegister_error(t *testing.T) {
68+
t.Parallel()
69+
70+
db, err := sqlite3.Open(":memory:")
71+
if err != nil {
72+
t.Fatal(err)
73+
}
74+
defer db.Close()
75+
76+
Register(db)
77+
78+
err = db.Exec(`SELECT upper('hello', 'enUS')`)
79+
if err == nil {
80+
t.Error("want error")
81+
}
82+
if !errors.Is(err, sqlite3.ERROR) {
83+
t.Errorf("got %v, want sqlite3.ERROR", err)
84+
}
85+
86+
err = db.Exec(`SELECT lower('hello', 'enUS')`)
87+
if err == nil {
88+
t.Error("want error")
89+
}
90+
if !errors.Is(err, sqlite3.ERROR) {
91+
t.Errorf("got %v, want sqlite3.ERROR", err)
92+
}
93+
94+
err = db.Exec(`SELECT 'hello' REGEXP '\'`)
95+
if err == nil {
96+
t.Error("want error")
97+
}
98+
if !errors.Is(err, sqlite3.ERROR) {
99+
t.Errorf("got %v, want sqlite3.ERROR", err)
100+
}
101+
102+
err = db.Exec(`SELECT 'hello' LIKE 'HELLO' ESCAPE '\\' `)
103+
if err == nil {
104+
t.Error("want error")
105+
}
106+
if !errors.Is(err, sqlite3.ERROR) {
107+
t.Errorf("got %v, want sqlite3.ERROR", err)
108+
}
109+
}
4110

5111
func Test_like2regex(t *testing.T) {
112+
const prefix = `(?is)\A`
113+
const sufix = `\z`
6114
tests := []struct {
7115
pattern string
8116
escape rune
9117
want string
10118
}{
11-
{`a`, -1, `(?is)a`},
12-
{`a.`, -1, `(?is)a\.`},
13-
{`a%`, -1, `(?is)a.*`},
14-
{`a\`, -1, `(?is)a\\`},
15-
{`a_b`, -1, `(?is)a.b`},
16-
{`a|b`, '|', `(?is)ab`},
17-
{`a|_`, '|', `(?is)a_`},
119+
{`a`, -1, `a`},
120+
{`a.`, -1, `a\.`},
121+
{`a%`, -1, `a.*`},
122+
{`a\`, -1, `a\\`},
123+
{`a_b`, -1, `a.b`},
124+
{`a|b`, '|', `ab`},
125+
{`a|_`, '|', `a_`},
18126
}
19127
for _, tt := range tests {
20128
t.Run(tt.pattern, func(t *testing.T) {
21-
if got := like2regex(tt.pattern, tt.escape); got != tt.want {
22-
t.Errorf("like2regex() = %v, want %v", got, tt.want)
129+
want := prefix + tt.want + sufix
130+
if got := like2regex(tt.pattern, tt.escape); got != want {
131+
t.Errorf("like2regex() = %q, want %q", got, want)
23132
}
24133
})
25134
}

0 commit comments

Comments
 (0)