Skip to content

Commit 271ea5e

Browse files
committed
Ensure error messages are always valid UTF-8
Given all of the scenarios for a MySQL error message's encoding, the end result is either a) a potentially corrupt string or b) a valid UTF-8 string. In any case, we'll want to end up with a UTF-8 string eventually.
1 parent 3751d8b commit 271ea5e

File tree

3 files changed

+117
-85
lines changed

3 files changed

+117
-85
lines changed

ext/mysql2/client.c

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
VALUE cMysql2Client;
1313
extern VALUE mMysql2, cMysql2Error;
1414
static VALUE sym_id, sym_version, sym_async, sym_symbolize_keys, sym_as, sym_array, sym_stream;
15-
static ID intern_merge, intern_merge_bang, intern_error_number_eql, intern_sql_state_eql;
15+
static ID intern_merge, intern_merge_bang, intern_error_number_eql, intern_sql_state_eql, intern_server_version;
1616

1717
#ifndef HAVE_RB_HASH_DUP
1818
static VALUE rb_hash_dup(VALUE other) {
@@ -125,26 +125,13 @@ static VALUE rb_raise_mysql2_error(mysql_client_wrapper *wrapper) {
125125
VALUE rb_error_msg = rb_str_new2(mysql_error(wrapper->client));
126126
VALUE rb_sql_state = rb_tainted_str_new2(mysql_sqlstate(wrapper->client));
127127
VALUE e;
128+
128129
#ifdef HAVE_RUBY_ENCODING_H
129-
if (wrapper->server_version < 50500) {
130-
/* MySQL < 5.5 uses mixed encoding, just call it binary. */
131-
int err_enc = rb_ascii8bit_encindex();
132-
rb_enc_associate_index(rb_error_msg, err_enc);
133-
rb_enc_associate_index(rb_sql_state, err_enc);
134-
} else {
135-
/* MySQL >= 5.5 uses UTF-8 errors internally and converts them to the connection encoding. */
136-
rb_encoding *default_internal_enc = rb_default_internal_encoding();
137-
rb_encoding *conn_enc = rb_to_encoding(wrapper->encoding);
138-
rb_enc_associate(rb_error_msg, conn_enc);
139-
rb_enc_associate(rb_sql_state, conn_enc);
140-
if (default_internal_enc) {
141-
rb_error_msg = rb_str_export_to_enc(rb_error_msg, default_internal_enc);
142-
rb_sql_state = rb_str_export_to_enc(rb_sql_state, default_internal_enc);
143-
}
144-
}
130+
rb_enc_associate(rb_error_msg, rb_utf8_encoding());
131+
rb_enc_associate(rb_sql_state, rb_usascii_encoding());
145132
#endif
146133

147-
e = rb_exc_new3(cMysql2Error, rb_error_msg);
134+
e = rb_funcall(cMysql2Error, rb_intern("new"), 2, rb_error_msg, LONG2FIX(wrapper->server_version));
148135
rb_funcall(e, intern_error_number_eql, 1, UINT2NUM(mysql_errno(wrapper->client)));
149136
rb_funcall(e, intern_sql_state_eql, 1, rb_sql_state);
150137
rb_exc_raise(e);
@@ -1221,6 +1208,7 @@ void init_mysql2_client() {
12211208
intern_merge_bang = rb_intern("merge!");
12221209
intern_error_number_eql = rb_intern("error_number=");
12231210
intern_sql_state_eql = rb_intern("sql_state=");
1211+
intern_server_version = rb_intern("server_version=");
12241212

12251213
#ifdef CLIENT_LONG_PASSWORD
12261214
rb_const_set(cMysql2Client, rb_intern("LONG_PASSWORD"),

lib/mysql2/error.rb

Lines changed: 65 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,74 @@
1+
# encoding: UTF-8
2+
13
module Mysql2
24
class Error < StandardError
3-
attr_accessor :error_number, :sql_state
5+
REPLACEMENT_CHAR = '?'
46

5-
def initialize msg
6-
super
7-
@error_number = nil
8-
@sql_state = nil
9-
end
7+
attr_accessor :error_number, :sql_state
8+
attr_writer :server_version
109

1110
# Mysql gem compatibility
1211
alias_method :errno, :error_number
1312
alias_method :error, :message
13+
14+
def initialize(msg, server_version=nil)
15+
self.server_version = server_version
16+
17+
super(clean_message(msg))
18+
end
19+
20+
private
21+
22+
# In MySQL 5.5+ error messages are always constructed server-side as UTF-8
23+
# then returned in the encoding set by the `character_set_results` system
24+
# variable.
25+
#
26+
# See http://dev.mysql.com/doc/refman/5.5/en/charset-errors.html for
27+
# more contetx.
28+
#
29+
# Before MySQL 5.5 error message template strings are in whatever encoding
30+
# is associated with the error message language.
31+
# See http://dev.mysql.com/doc/refman/5.1/en/error-message-language.html
32+
# for more information.
33+
#
34+
# The issue is that the user-data inserted in the message could potentially
35+
# be in any encoding MySQL supports and is insert into the latin1, euckr or
36+
# koi8r string raw. Meaning there's a high probability the string will be
37+
# corrupt encoding-wise.
38+
#
39+
# See http://dev.mysql.com/doc/refman/5.1/en/charset-errors.html for
40+
# more information.
41+
#
42+
# So in an attempt to make sure the error message string is always in a valid
43+
# encoding, we'll assume UTF-8 and clean the string of anything that's not a
44+
# valid UTF-8 character.
45+
#
46+
# Except for if we're on 1.8, where we'll do nothing ;)
47+
#
48+
# Returns a valid UTF-8 string in Ruby 1.9+, the original string on Ruby 1.8
49+
def clean_message(message)
50+
return message if !message.respond_to?(:encoding)
51+
52+
if @server_version && @server_version > 50500
53+
message
54+
else
55+
if message.respond_to? :scrub
56+
message.scrub
57+
else
58+
# This is ugly as hell but Ruby 1.9 doesn't provide a way to clean a string
59+
# and retain it's valid UTF-8 characters, that I know of.
60+
61+
new_message = "".force_encoding(Encoding::UTF_8)
62+
message.chars.each do |char|
63+
if char.valid_encoding?
64+
new_message << char
65+
else
66+
new_message << REPLACEMENT_CHAR
67+
end
68+
end
69+
new_message
70+
end
71+
end
72+
end
1473
end
1574
end

spec/mysql2/error_spec.rb

Lines changed: 46 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,86 +1,71 @@
11
# encoding: UTF-8
2-
require 'spec_helper'
3-
4-
# The matrix of error encoding tests:
5-
# ('Enc = X' means 'Encoding.default_internal = X')
6-
# MySQL < 5.5 MySQL >= 5.5
7-
# Ruby 1.8 N/A N/A
8-
# Ruby 1.9+
9-
# Enc = nil
10-
# :enc = nil BINARY UTF-8
11-
#
12-
# Enc = XYZ
13-
# :enc = XYZ BINARY XYZ
14-
#
15-
# Enc = FOO
16-
# :enc = BAR BINARY FOO
17-
#
182

3+
require 'spec_helper'
194

205
describe Mysql2::Error do
21-
shared_examples "mysql2 error" do
6+
let(:client) { Mysql2::Client.new(DatabaseCredentials['root']) }
7+
8+
let :error do
229
begin
23-
err_client = Mysql2::Client.new(DatabaseCredentials['root'])
24-
err_client.query("HAHAHA")
10+
client.query("HAHAHA")
2511
rescue Mysql2::Error => e
2612
error = e
2713
ensure
28-
err_client.close
14+
client.close
2915
end
3016

31-
subject { error }
32-
it { should respond_to(:error_number) }
33-
it { should respond_to(:sql_state) }
17+
error
18+
end
19+
20+
it "responds to error_number and sql_state, with aliases" do
21+
error.should respond_to(:error_number)
22+
error.should respond_to(:sql_state)
3423

3524
# Mysql gem compatibility
36-
it { should respond_to(:errno) }
37-
it { should respond_to(:error) }
25+
error.should respond_to(:errno)
26+
error.should respond_to(:error)
3827
end
3928

40-
shared_examples "mysql2 error encoding" do |db_enc, def_enc, err_enc|
41-
Encoding.default_internal = def_enc
29+
if "".respond_to? :encoding
30+
let :error do
31+
client = Mysql2::Client.new(DatabaseCredentials['root'])
32+
begin
33+
client.query("造字")
34+
rescue Mysql2::Error => e
35+
error = e
36+
ensure
37+
client.close
38+
end
4239

43-
begin
44-
err_client = Mysql2::Client.new(DatabaseCredentials['root'].merge(:encoding => db_enc))
45-
err_client.query("造字")
46-
rescue Mysql2::Error => e
47-
error = e
48-
ensure
49-
err_client.close
40+
error
5041
end
5142

52-
subject { error.message.encoding }
53-
it "#message should transcode from #{db_enc.inspect} to #{err_enc}" do should eql(err_enc) end
43+
let :bad_err do
44+
client = Mysql2::Client.new(DatabaseCredentials['root'])
45+
begin
46+
client.query("\xE5\xC6\x7D\x1F")
47+
rescue Mysql2::Error => e
48+
error = e
49+
ensure
50+
client.close
51+
end
5452

55-
subject { error.error.encoding }
56-
it "#error should transcode from #{db_enc.inspect} to #{err_enc}" do should eql(err_enc) end
57-
58-
subject { error.sql_state.encoding }
59-
it "#sql_state should transcode from #{db_enc.inspect} to #{err_enc}" do should eql(err_enc) end
60-
end
53+
error
54+
end
6155

62-
shared_examples "mysql2 error encoding (MySQL < 5.5)" do |db_enc, def_enc, err_enc|
63-
include_examples "mysql2 error encoding", db_enc, def_enc, err_enc
64-
end
56+
it "returns error messages as UTF-8" do
57+
error.message.encoding.should eql(Encoding::UTF_8)
58+
error.message.valid_encoding?
6559

66-
shared_examples "mysql2 error encoding (MySQL >= 5.5)" do |db_enc, def_enc, err_enc|
67-
include_examples "mysql2 error encoding", db_enc, def_enc, err_enc
68-
end
60+
bad_err.message.encoding.should eql(Encoding::UTF_8)
61+
bad_err.message.valid_encoding?
6962

70-
it_behaves_like "mysql2 error"
63+
bad_err.message.should include("??}\u001F")
64+
end
7165

72-
unless RUBY_VERSION =~ /1.8/
73-
mysql_ver = Mysql2::Client.new(DatabaseCredentials['root']).server_info[:id]
74-
if mysql_ver < 50505
75-
it_behaves_like "mysql2 error encoding (MySQL < 5.5)", nil, nil, Encoding::ASCII_8BIT
76-
it_behaves_like "mysql2 error encoding (MySQL < 5.5)", 'utf8', Encoding::UTF_8, Encoding::ASCII_8BIT
77-
it_behaves_like "mysql2 error encoding (MySQL < 5.5)", 'big5', Encoding::Big5, Encoding::ASCII_8BIT
78-
it_behaves_like "mysql2 error encoding (MySQL < 5.5)", 'big5', Encoding::US_ASCII, Encoding::ASCII_8BIT
79-
else
80-
it_behaves_like "mysql2 error encoding (MySQL >= 5.5)", nil, nil, Encoding::UTF_8
81-
it_behaves_like "mysql2 error encoding (MySQL >= 5.5)", 'utf8', Encoding::UTF_8, Encoding::UTF_8
82-
it_behaves_like "mysql2 error encoding (MySQL >= 5.5)", 'big5', Encoding::Big5, Encoding::Big5
83-
it_behaves_like "mysql2 error encoding (MySQL >= 5.5)", 'big5', Encoding::US_ASCII, Encoding::US_ASCII
66+
it "returns sql state as ASCII" do
67+
error.sql_state.encoding.should eql(Encoding::US_ASCII)
68+
error.sql_state.valid_encoding?
8469
end
8570
end
8671
end

0 commit comments

Comments
 (0)