Skip to content

Commit 814c49a

Browse files
committed
add mysql charset id -> encoding name mapping
1 parent 8de770e commit 814c49a

File tree

2 files changed

+78
-0
lines changed

2 files changed

+78
-0
lines changed

lib/constants/charset_encodings.js

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// see tools/generate-charset-mapping.js
2+
// basicalliy result of "SHOW COLLATION" query
3+
4+
module.exports = [
5+
null,"big5","latin2","dec8","cp850","latin1","hp8","koi8r",
6+
"latin1","latin2","swe7","ascii","ujis","sjis","cp1251","latin1",
7+
"hebrew",null,"tis620","euckr","latin7","latin2","koi8u","cp1251",
8+
"gb2312","greek","cp1250","latin2","gbk","cp1257","latin5","latin1",
9+
"armscii8","cesu8","cp1250","ucs2","cp866","keybcs2","macintosh","macroman",
10+
"cp852","latin7","latin7","macintosh","cp1250","cesu8","cesu8","latin1",
11+
"latin1","latin1","cp1251","cp1251","cp1251","macroman","utf16","utf16",
12+
"utf16le","cp1256","cp1257","cp1257","utf32","utf32","utf16le","binary",
13+
"armscii8","ascii","cp1250","cp1256","cp866","dec8","greek","hebrew",
14+
"hp8","keybcs2","koi8r","koi8u",null,"latin2","latin5","latin7",
15+
"cp850","cp852","swe7","cesu8","big5","euckr","gb2312","gbk",
16+
"sjis","tis620","ucs2","ujis","geostd8","geostd8","latin1","cp932",
17+
"cp932","eucjp","eucjp","cp1250",null,"utf16","utf16","utf16",
18+
"utf16","utf16","utf16","utf16","utf16","utf16","utf16","utf16",
19+
"utf16","utf16","utf16","utf16","utf16","utf16","utf16","utf16",
20+
"utf16","utf16","utf16","utf16","utf16",null,null,null,
21+
"ucs2","ucs2","ucs2","ucs2","ucs2","ucs2","ucs2","ucs2",
22+
"ucs2","ucs2","ucs2","ucs2","ucs2","ucs2","ucs2","ucs2",
23+
"ucs2","ucs2","ucs2","ucs2","ucs2","ucs2","ucs2","ucs2",
24+
null,null,null,null,null,null,null,"ucs2",
25+
"utf32","utf32","utf32","utf32","utf32","utf32","utf32","utf32",
26+
"utf32","utf32","utf32","utf32","utf32","utf32","utf32","utf32",
27+
"utf32","utf32","utf32","utf32","utf32","utf32","utf32","utf32",
28+
null,null,null,null,null,null,null,null,
29+
"cesu8","cesu8","cesu8","cesu8","cesu8","cesu8","cesu8","cesu8",
30+
"cesu8","cesu8","cesu8","cesu8","cesu8","cesu8","cesu8","cesu8",
31+
"cesu8","cesu8","cesu8","cesu8","cesu8","cesu8","cesu8","cesu8",
32+
null,null,null,null,null,null,null,"cesu8",
33+
"cesu8","cesu8","cesu8","cesu8","cesu8","cesu8","cesu8","cesu8",
34+
"cesu8","cesu8","cesu8","cesu8","cesu8","cesu8","cesu8","cesu8",
35+
"cesu8","cesu8","cesu8","cesu8","cesu8","cesu8","cesu8","cesu8",
36+
"gb18030","gb18030","gb18030"
37+
];

tools/generate-charset-mapping.js

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
var mysql = require('../index.js');
2+
3+
var conn = mysql.createConnection({
4+
});
5+
6+
var iconv = require('iconv-lite');
7+
8+
var charsets = []
9+
10+
// TODO: add encodings missing in iconv-lite
11+
// "dec8","hp8","swe7","ujis","keybcs2","utf32","geostd8"
12+
13+
// see also https://github.com/ashtuchkin/iconv-lite/issues/125
14+
// https://en.wikipedia.org/wiki/Kamenick%C3%BD_encoding
15+
// https://github.com/twitter/mysql/tree/master/sql/share/charsets
16+
17+
var mysql2iconv = {
18+
utf8: 'cesu8',
19+
utf8mb4: 'cesu8',
20+
// need to check that this is correct mapping
21+
macce: 'macintosh', // Mac Central European
22+
eucjpms: 'eucjp' // UJIS for Windows Japanese
23+
};
24+
25+
var missing = {};
26+
27+
conn.query('show collation', function(err, res) {
28+
res.forEach( r => {
29+
var charset = r.Charset;
30+
var iconvCharset = mysql2iconv[charset] || charset; // if there is manuall mapping, override
31+
if (!iconv.encodingExists(iconvCharset)) {
32+
missing[iconvCharset] = 1;
33+
}
34+
charsets[r.Id] = iconvCharset;
35+
});
36+
//console.log(JSON.stringify(missing, 4, null));
37+
console.log(JSON.stringify(charsets, 4, null));
38+
});
39+
40+
conn.end();
41+

0 commit comments

Comments
 (0)