Skip to content

Commit dca7e33

Browse files
committed
Fixes Issue #5699
1 parent 3091e9e commit dca7e33

File tree

4 files changed

+44
-6
lines changed

4 files changed

+44
-6
lines changed

enginetest/queries/charset_collation_engine.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,19 @@ type CharsetCollationEngineTestQuery struct {
4343
// CharsetCollationEngineTests are used to ensure that character sets and collations have the correct behavior over the
4444
// engine. Return values should all have the `utf8mb4` encoding, as it's returning the internal encoding type.
4545
var CharsetCollationEngineTests = []CharsetCollationEngineTest{
46+
{
47+
Name: "Uppercase and lowercase collations",
48+
Queries: []CharsetCollationEngineTestQuery{
49+
{
50+
Query: "CREATE TABLE test1 (v1 VARCHAR(255) COLLATE utf16_unicode_ci, v2 VARCHAR(255) COLLATE UTF16_UNICODE_CI);",
51+
Expected: []sql.Row{{types.NewOkResult(0)}},
52+
},
53+
{
54+
Query: "CREATE TABLE test2 (v1 VARCHAR(255) CHARACTER SET utf16, v2 VARCHAR(255) CHARACTER SET UTF16);",
55+
Expected: []sql.Row{{types.NewOkResult(0)}},
56+
},
57+
},
58+
},
4659
{
4760
Name: "Insert multiple character sets",
4861
SetUpScript: []string{

enginetest/queries/charset_collation_wire.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,19 @@ type CharsetCollationWireTestQuery struct {
3636
// CharsetCollationWireTests are used to ensure that character sets and collations have the correct behavior over the
3737
// wire. Return values should all have the table encoding, as it's returning the table's encoding type.
3838
var CharsetCollationWireTests = []CharsetCollationWireTest{
39+
{
40+
Name: "Uppercase and lowercase collations",
41+
Queries: []CharsetCollationWireTestQuery{
42+
{
43+
Query: "CREATE TABLE test1 (v1 VARCHAR(255) COLLATE utf16_unicode_ci, v2 VARCHAR(255) COLLATE UTF16_UNICODE_CI);",
44+
Expected: []sql.Row{{types.NewOkResult(0)}},
45+
},
46+
{
47+
Query: "CREATE TABLE test2 (v1 VARCHAR(255) CHARACTER SET utf16, v2 VARCHAR(255) CHARACTER SET UTF16);",
48+
Expected: []sql.Row{{types.NewOkResult(0)}},
49+
},
50+
},
51+
},
3952
{
4053
Name: "Insert multiple character sets",
4154
SetUpScript: []string{

sql/charactersets.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,11 @@
1414

1515
package sql
1616

17-
import "github.com/dolthub/go-mysql-server/sql/encodings"
17+
import (
18+
"strings"
19+
20+
"github.com/dolthub/go-mysql-server/sql/encodings"
21+
)
1822

1923
// CharacterSet represents the character set of a string.
2024
type CharacterSet struct {
@@ -204,7 +208,7 @@ var SupportedCharsets = []CharacterSetID{
204208
// ParseCharacterSet takes in a string representing a CharacterSet and returns the result if a match is found, or an
205209
// error if not.
206210
func ParseCharacterSet(str string) (CharacterSetID, error) {
207-
if cs, ok := characterSetStringToID[str]; ok {
211+
if cs, ok := characterSetStringToID[strings.ToLower(str)]; ok {
208212
return cs, nil
209213
}
210214
// It is valid recognize an empty string as the invalid charset, as some analyzer steps may temporarily use the

sql/collations.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ package sql
1717
import (
1818
"fmt"
1919
"io"
20+
"strings"
2021
"unicode/utf8"
2122

2223
"github.com/cespare/xxhash"
@@ -33,9 +34,16 @@ type Collation struct {
3334
IsCompiled bool
3435
SortLength uint8
3536
PadAttribute string
36-
Sorter func(r rune) int32
37+
Sorter CollationSorter
3738
}
3839

40+
// CollationSorter is a collation's sort function. When given a rune, an integer is returned that represents that rune's
41+
// order when sorted against all other runes. That integer is referred to as a sort order. When two runes have the same
42+
// sort order, they are considered equivalent. For example, case-insensitive collations return the same sort order for
43+
// uppercase and lowercase variants of a character, while case-sensitive collations return different sort orders.
44+
// Comparing sort orders from different collations is meaningless, and therefore represents a logical error.
45+
type CollationSorter func(r rune) int32
46+
3947
// CollationsIterator iterates over every collation available, ordered by their ID (ascending).
4048
type CollationsIterator struct {
4149
idx int
@@ -758,7 +766,7 @@ func ParseCollation(characterSetStr *string, collationStr *string, binaryAttribu
758766
if collationStr == nil || len(*collationStr) == 0 {
759767
return Collation_Unspecified, nil
760768
}
761-
if collation, ok := collationStringToID[*collationStr]; ok {
769+
if collation, ok := collationStringToID[strings.ToLower(*collationStr)]; ok {
762770
if binaryAttribute {
763771
return collation.CharacterSet().BinaryCollation(), nil
764772
}
@@ -776,7 +784,7 @@ func ParseCollation(characterSetStr *string, collationStr *string, binaryAttribu
776784
}
777785
return characterSet.DefaultCollation(), nil
778786
}
779-
collation, exists := collationStringToID[*collationStr]
787+
collation, exists := collationStringToID[strings.ToLower(*collationStr)]
780788
if !exists {
781789
return Collation_Unspecified, ErrCollationUnknown.New(*collationStr)
782790
}
@@ -911,7 +919,7 @@ func (c CollationID) HashToBytes(str string) ([]byte, error) {
911919

912920
// Sorter returns this collation's sort function. As collations are a work-in-progress, it is recommended to avoid
913921
// using any collations that return a nil sort function.
914-
func (c CollationID) Sorter() func(r rune) int32 {
922+
func (c CollationID) Sorter() CollationSorter {
915923
return collationArray[c].Sorter
916924
}
917925

0 commit comments

Comments
 (0)