Skip to content

Commit 13fb928

Browse files
committed
add improved collation support (broken)
1 parent 411cb38 commit 13fb928

File tree

7 files changed

+314
-11
lines changed

7 files changed

+314
-11
lines changed

README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,22 @@ Default: none
143143

144144
Sets the charset used for client-server interaction (`"SET NAMES <value>"`). If multiple charsets are set (separated by a comma), the following charset is used if setting the charset failes. This enables support for `utf8mb4` ([introduced in MySQL 5.5.3](http://dev.mysql.com/doc/refman/5.5/en/charset-unicode-utf8mb4.html)) with fallback to `utf8` for older servers (`charset=utf8mb4,utf8`).
145145

146+
*Please note:*
147+
148+
Usage of the `charset` parameter is discouraged because it issues queries.
149+
Unless you need the fallback behavior, please use `collation` instead.
150+
151+
##### `collation`
152+
153+
```
154+
Type: string
155+
Valid Values: <name>
156+
Default: utf8_general_ci
157+
```
158+
159+
Sets the collation used for client-server interaction on connection. In contrast to `charset`, `collation` does not issue queries. If the specified collation is unavailable on the target server, the connection will fail.
160+
161+
A list of valid charsets for a server is retrievable with `SHOW COLLATION`.
146162

147163
##### `clientFoundRows`
148164

collations.go

Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
// Go MySQL Driver - A MySQL-Driver for Go's database/sql package
2+
//
3+
// Copyright 2014 The Go-MySQL-Driver Authors. All rights reserved.
4+
//
5+
// This Source Code Form is subject to the terms of the Mozilla Public
6+
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
7+
// You can obtain one at http://mozilla.org/MPL/2.0/.
8+
9+
package mysql
10+
11+
const collationUtf8GeneralCi = 33
12+
13+
const defaultCollation byte = collationUtf8GeneralCi
14+
15+
// A list of available collations and associated charsets to update this map
16+
// is available in MySQL with the query
17+
// SELECT COLLATION_NAME, ID FROM information_schema.COLLATIONS
18+
var collations = map[string]byte{
19+
"big5_chinese_ci": 1,
20+
"latin2_czech_cs": 2,
21+
"dec8_swedish_ci": 3,
22+
"cp850_general_ci": 4,
23+
"latin1_german1_ci": 5,
24+
"hp8_english_ci": 6,
25+
"koi8r_general_ci": 7,
26+
"latin1_swedish_ci": 8,
27+
"latin2_general_ci": 9,
28+
"swe7_swedish_ci": 10,
29+
"ascii_general_ci": 11,
30+
"ujis_japanese_ci": 12,
31+
"sjis_japanese_ci": 13,
32+
"cp1251_bulgarian_ci": 14,
33+
"latin1_danish_ci": 15,
34+
"hebrew_general_ci": 16,
35+
"tis620_thai_ci": 18,
36+
"euckr_korean_ci": 19,
37+
"latin7_estonian_cs": 20,
38+
"latin2_hungarian_ci": 21,
39+
"koi8u_general_ci": 22,
40+
"cp1251_ukrainian_ci": 23,
41+
"gb2312_chinese_ci": 24,
42+
"greek_general_ci": 25,
43+
"cp1250_general_ci": 26,
44+
"latin2_croatian_ci": 27,
45+
"gbk_chinese_ci": 28,
46+
"cp1257_lithuanian_ci": 29,
47+
"latin5_turkish_ci": 30,
48+
"latin1_german2_ci": 31,
49+
"armscii8_general_ci": 32,
50+
"utf8_general_ci": collationUtf8GeneralCi,
51+
"cp1250_czech_cs": 34,
52+
"ucs2_general_ci": 35,
53+
"cp866_general_ci": 36,
54+
"keybcs2_general_ci": 37,
55+
"macce_general_ci": 38,
56+
"macroman_general_ci": 39,
57+
"cp852_general_ci": 40,
58+
"latin7_general_ci": 41,
59+
"latin7_general_cs": 42,
60+
"macce_bin": 43,
61+
"cp1250_croatian_ci": 44,
62+
"utf8mb4_general_ci": 45,
63+
"utf8mb4_bin": 46,
64+
"latin1_bin": 47,
65+
"latin1_general_ci": 48,
66+
"latin1_general_cs": 49,
67+
"cp1251_bin": 50,
68+
"cp1251_general_ci": 51,
69+
"cp1251_general_cs": 52,
70+
"macroman_bin": 53,
71+
"utf16_general_ci": 54,
72+
"utf16_bin": 55,
73+
"utf16le_general_ci": 56,
74+
"cp1256_general_ci": 57,
75+
"cp1257_bin": 58,
76+
"cp1257_general_ci": 59,
77+
"utf32_general_ci": 60,
78+
"utf32_bin": 61,
79+
"utf16le_bin": 62,
80+
"binary": 63,
81+
"armscii8_bin": 64,
82+
"ascii_bin": 65,
83+
"cp1250_bin": 66,
84+
"cp1256_bin": 67,
85+
"cp866_bin": 68,
86+
"dec8_bin": 69,
87+
"greek_bin": 70,
88+
"hebrew_bin": 71,
89+
"hp8_bin": 72,
90+
"keybcs2_bin": 73,
91+
"koi8r_bin": 74,
92+
"koi8u_bin": 75,
93+
"latin2_bin": 77,
94+
"latin5_bin": 78,
95+
"latin7_bin": 79,
96+
"cp850_bin": 80,
97+
"cp852_bin": 81,
98+
"swe7_bin": 82,
99+
"utf8_bin": 83,
100+
"big5_bin": 84,
101+
"euckr_bin": 85,
102+
"gb2312_bin": 86,
103+
"gbk_bin": 87,
104+
"sjis_bin": 88,
105+
"tis620_bin": 89,
106+
"ucs2_bin": 90,
107+
"ujis_bin": 91,
108+
"geostd8_general_ci": 92,
109+
"geostd8_bin": 93,
110+
"latin1_spanish_ci": 94,
111+
"cp932_japanese_ci": 95,
112+
"cp932_bin": 96,
113+
"eucjpms_japanese_ci": 97,
114+
"eucjpms_bin": 98,
115+
"cp1250_polish_ci": 99,
116+
"utf16_unicode_ci": 101,
117+
"utf16_icelandic_ci": 102,
118+
"utf16_latvian_ci": 103,
119+
"utf16_romanian_ci": 104,
120+
"utf16_slovenian_ci": 105,
121+
"utf16_polish_ci": 106,
122+
"utf16_estonian_ci": 107,
123+
"utf16_spanish_ci": 108,
124+
"utf16_swedish_ci": 109,
125+
"utf16_turkish_ci": 110,
126+
"utf16_czech_ci": 111,
127+
"utf16_danish_ci": 112,
128+
"utf16_lithuanian_ci": 113,
129+
"utf16_slovak_ci": 114,
130+
"utf16_spanish2_ci": 115,
131+
"utf16_roman_ci": 116,
132+
"utf16_persian_ci": 117,
133+
"utf16_esperanto_ci": 118,
134+
"utf16_hungarian_ci": 119,
135+
"utf16_sinhala_ci": 120,
136+
"utf16_german2_ci": 121,
137+
"utf16_croatian_ci": 122,
138+
"utf16_unicode_520_ci": 123,
139+
"utf16_vietnamese_ci": 124,
140+
"ucs2_unicode_ci": 128,
141+
"ucs2_icelandic_ci": 129,
142+
"ucs2_latvian_ci": 130,
143+
"ucs2_romanian_ci": 131,
144+
"ucs2_slovenian_ci": 132,
145+
"ucs2_polish_ci": 133,
146+
"ucs2_estonian_ci": 134,
147+
"ucs2_spanish_ci": 135,
148+
"ucs2_swedish_ci": 136,
149+
"ucs2_turkish_ci": 137,
150+
"ucs2_czech_ci": 138,
151+
"ucs2_danish_ci": 139,
152+
"ucs2_lithuanian_ci": 140,
153+
"ucs2_slovak_ci": 141,
154+
"ucs2_spanish2_ci": 142,
155+
"ucs2_roman_ci": 143,
156+
"ucs2_persian_ci": 144,
157+
"ucs2_esperanto_ci": 145,
158+
"ucs2_hungarian_ci": 146,
159+
"ucs2_sinhala_ci": 147,
160+
"ucs2_german2_ci": 148,
161+
"ucs2_croatian_ci": 149,
162+
"ucs2_unicode_520_ci": 150,
163+
"ucs2_vietnamese_ci": 151,
164+
"ucs2_general_mysql500_ci": 159,
165+
"utf32_unicode_ci": 160,
166+
"utf32_icelandic_ci": 161,
167+
"utf32_latvian_ci": 162,
168+
"utf32_romanian_ci": 163,
169+
"utf32_slovenian_ci": 164,
170+
"utf32_polish_ci": 165,
171+
"utf32_estonian_ci": 166,
172+
"utf32_spanish_ci": 167,
173+
"utf32_swedish_ci": 168,
174+
"utf32_turkish_ci": 169,
175+
"utf32_czech_ci": 170,
176+
"utf32_danish_ci": 171,
177+
"utf32_lithuanian_ci": 172,
178+
"utf32_slovak_ci": 173,
179+
"utf32_spanish2_ci": 174,
180+
"utf32_roman_ci": 175,
181+
"utf32_persian_ci": 176,
182+
"utf32_esperanto_ci": 177,
183+
"utf32_hungarian_ci": 178,
184+
"utf32_sinhala_ci": 179,
185+
"utf32_german2_ci": 180,
186+
"utf32_croatian_ci": 181,
187+
"utf32_unicode_520_ci": 182,
188+
"utf32_vietnamese_ci": 183,
189+
"utf8_unicode_ci": 192,
190+
"utf8_icelandic_ci": 193,
191+
"utf8_latvian_ci": 194,
192+
"utf8_romanian_ci": 195,
193+
"utf8_slovenian_ci": 196,
194+
"utf8_polish_ci": 197,
195+
"utf8_estonian_ci": 198,
196+
"utf8_spanish_ci": 199,
197+
"utf8_swedish_ci": 200,
198+
"utf8_turkish_ci": 201,
199+
"utf8_czech_ci": 202,
200+
"utf8_danish_ci": 203,
201+
"utf8_lithuanian_ci": 204,
202+
"utf8_slovak_ci": 205,
203+
"utf8_spanish2_ci": 206,
204+
"utf8_roman_ci": 207,
205+
"utf8_persian_ci": 208,
206+
"utf8_esperanto_ci": 209,
207+
"utf8_hungarian_ci": 210,
208+
"utf8_sinhala_ci": 211,
209+
"utf8_german2_ci": 212,
210+
"utf8_croatian_ci": 213,
211+
"utf8_unicode_520_ci": 214,
212+
"utf8_vietnamese_ci": 215,
213+
"utf8_general_mysql500_ci": 223,
214+
"utf8mb4_unicode_ci": 224,
215+
"utf8mb4_icelandic_ci": 225,
216+
"utf8mb4_latvian_ci": 226,
217+
"utf8mb4_romanian_ci": 227,
218+
"utf8mb4_slovenian_ci": 228,
219+
"utf8mb4_polish_ci": 229,
220+
"utf8mb4_estonian_ci": 230,
221+
"utf8mb4_spanish_ci": 231,
222+
"utf8mb4_swedish_ci": 232,
223+
"utf8mb4_turkish_ci": 233,
224+
"utf8mb4_czech_ci": 234,
225+
"utf8mb4_danish_ci": 235,
226+
"utf8mb4_lithuanian_ci": 236,
227+
"utf8mb4_slovak_ci": 237,
228+
"utf8mb4_spanish2_ci": 238,
229+
"utf8mb4_roman_ci": 239,
230+
"utf8mb4_persian_ci": 240,
231+
"utf8mb4_esperanto_ci": 241,
232+
"utf8mb4_hungarian_ci": 242,
233+
"utf8mb4_sinhala_ci": 243,
234+
"utf8mb4_german2_ci": 244,
235+
"utf8mb4_croatian_ci": 245,
236+
"utf8mb4_unicode_520_ci": 246,
237+
"utf8mb4_vietnamese_ci": 247,
238+
}

connection.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ type mysqlConn struct {
2727
maxWriteSize int
2828
flags clientFlag
2929
sequence uint8
30+
collation byte
3031
parseTime bool
3132
strict bool
3233
}
@@ -50,6 +51,20 @@ type config struct {
5051
func (mc *mysqlConn) handleParams() (err error) {
5152
for param, val := range mc.cfg.params {
5253
switch param {
54+
// Collation
55+
case "collation":
56+
collation, ok := collations[val]
57+
if !ok {
58+
// Note possibility for false negatives:
59+
// could be caused although the collation is valid
60+
// if the collations map does not contain entries
61+
// the server supports.
62+
err = errors.New("unknown collation")
63+
return
64+
}
65+
mc.collation = collation
66+
break
67+
5368
// Charset
5469
case "charset":
5570
charsets := strings.Split(val, ",")

const.go

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -130,13 +130,3 @@ const (
130130
flagUnknown3
131131
flagUnknown4
132132
)
133-
134-
const (
135-
collation_ascii_general_ci byte = 11
136-
collation_utf8_general_ci byte = 33
137-
collation_utf8mb4_general_ci byte = 45
138-
collation_utf8mb4_bin byte = 46
139-
collation_latin1_general_ci byte = 48
140-
collation_binary byte = 63
141-
collation_utf8mb4_unicode_ci byte = 224
142-
)

driver.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ func (d *MySQLDriver) Open(dsn string) (driver.Conn, error) {
4040
mc := &mysqlConn{
4141
maxPacketAllowed: maxPacketSize,
4242
maxWriteSize: maxPacketSize - 1,
43+
collation: defaultCollation,
4344
}
4445
mc.cfg, err = parseDSN(dsn)
4546
if err != nil {

driver_test.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -938,6 +938,49 @@ func TestFailingCharset(t *testing.T) {
938938
})
939939
}
940940

941+
func TestCollation(t *testing.T) {
942+
if !available {
943+
t.Skipf("MySQL-Server not running on %s", netAddr)
944+
}
945+
946+
defaultCollation := "utf8_general_ci"
947+
tests := []string{
948+
"", // do not set
949+
defaultCollation, // driver default
950+
"latin1_general_ci",
951+
"binary",
952+
"utf8mb4_general_ci",
953+
}
954+
cdsn := dsn
955+
for _, collation := range tests {
956+
var expected string
957+
if collation != "" {
958+
cdsn += "&collation=" + collation
959+
expected = collation
960+
} else {
961+
expected = defaultCollation
962+
}
963+
runTests(t, cdsn, func(dbt *DBTest) {
964+
rows := dbt.mustQuery("SELECT @@collation_connection")
965+
defer rows.Close()
966+
967+
if !rows.Next() {
968+
dbt.Fatalf("Error getting connection collation: %s", rows.Err())
969+
}
970+
971+
var got string
972+
err := rows.Scan(&got)
973+
if err != nil {
974+
dbt.Fatal(err)
975+
}
976+
977+
if got != expected {
978+
dbt.Fatalf("Expected connection collation %s but got %s", expected, got)
979+
}
980+
})
981+
}
982+
}
983+
941984
func TestRawBytesResultExceedsBuffer(t *testing.T) {
942985
runTests(t, dsn, func(dbt *DBTest) {
943986
// defaultBufSize from buffer.go

packets.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ func (mc *mysqlConn) writeAuthPacket(cipher []byte) error {
257257
data[11] = 0x00
258258

259259
// Charset [1 byte]
260-
data[12] = collation_utf8_general_ci
260+
data[12] = mc.collation
261261

262262
// SSL Connection Request Packet
263263
// http://dev.mysql.com/doc/internals/en/connection-phase-packets.html#packet-Protocol::SSLRequest

0 commit comments

Comments
 (0)