Skip to content

Commit 097d742

Browse files
committed
[Bug #20009] Support marshaling non-ASCII name class/module
1 parent 5e01c0e commit 097d742

File tree

3 files changed

+93
-35
lines changed

3 files changed

+93
-35
lines changed

marshal.c

Lines changed: 64 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,31 @@ w_float(double d, struct dump_arg *arg)
460460
}
461461
}
462462

463+
464+
static VALUE
465+
w_encivar(VALUE str, struct dump_arg *arg)
466+
{
467+
VALUE encname = encoding_name(str, arg);
468+
if (NIL_P(encname) ||
469+
is_ascii_string(str)) {
470+
return Qnil;
471+
}
472+
w_byte(TYPE_IVAR, arg);
473+
return encname;
474+
}
475+
476+
static void
477+
w_encname(VALUE encname, struct dump_arg *arg)
478+
{
479+
if (!NIL_P(encname)) {
480+
struct dump_call_arg c_arg;
481+
c_arg.limit = 1;
482+
c_arg.arg = arg;
483+
w_long(1L, arg);
484+
w_encoding(encname, &c_arg);
485+
}
486+
}
487+
463488
static void
464489
w_symbol(VALUE sym, struct dump_arg *arg)
465490
{
@@ -476,24 +501,11 @@ w_symbol(VALUE sym, struct dump_arg *arg)
476501
if (!sym) {
477502
rb_raise(rb_eTypeError, "can't dump anonymous ID %"PRIdVALUE, sym);
478503
}
479-
encname = encoding_name(sym, arg);
480-
if (NIL_P(encname) ||
481-
is_ascii_string(sym)) {
482-
encname = Qnil;
483-
}
484-
else {
485-
w_byte(TYPE_IVAR, arg);
486-
}
504+
encname = w_encivar(sym, arg);
487505
w_byte(TYPE_SYMBOL, arg);
488506
w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg);
489507
st_add_direct(arg->symbols, orig_sym, arg->symbols->num_entries);
490-
if (!NIL_P(encname)) {
491-
struct dump_call_arg c_arg;
492-
c_arg.limit = 1;
493-
c_arg.arg = arg;
494-
w_long(1L, arg);
495-
w_encoding(encname, &c_arg);
496-
}
508+
w_encname(encname, arg);
497509
}
498510
}
499511

@@ -953,19 +965,23 @@ w_object(VALUE obj, struct dump_arg *arg, int limit)
953965
if (FL_TEST(obj, FL_SINGLETON)) {
954966
rb_raise(rb_eTypeError, "singleton class can't be dumped");
955967
}
956-
w_byte(TYPE_CLASS, arg);
957968
{
958969
VALUE path = class2path(obj);
970+
VALUE encname = w_encivar(path, arg);
971+
w_byte(TYPE_CLASS, arg);
959972
w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
973+
w_encname(encname, arg);
960974
RB_GC_GUARD(path);
961975
}
962976
break;
963977

964978
case T_MODULE:
965-
w_byte(TYPE_MODULE, arg);
966979
{
967980
VALUE path = class2path(obj);
981+
VALUE encname = w_encivar(path, arg);
982+
w_byte(TYPE_MODULE, arg);
968983
w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
984+
w_encname(encname, arg);
969985
RB_GC_GUARD(path);
970986
}
971987
break;
@@ -1707,6 +1723,34 @@ r_copy_ivar(VALUE v, VALUE data)
17071723
"can't override instance variable of "type" '%"PRIsVALUE"'", \
17081724
(str))
17091725

1726+
static int
1727+
r_ivar_encoding(VALUE obj, struct load_arg *arg, VALUE sym, VALUE val)
1728+
{
1729+
int idx = sym2encidx(sym, val);
1730+
if (idx >= 0) {
1731+
if (rb_enc_capable(obj)) {
1732+
rb_enc_associate_index(obj, idx);
1733+
}
1734+
else {
1735+
rb_raise(rb_eArgError, "%"PRIsVALUE" is not enc_capable", obj);
1736+
}
1737+
return TRUE;
1738+
}
1739+
return FALSE;
1740+
}
1741+
1742+
static long
1743+
r_encname(VALUE obj, struct load_arg *arg)
1744+
{
1745+
long len = r_long(arg);
1746+
if (len > 0) {
1747+
VALUE sym = r_symbol(arg);
1748+
VALUE val = r_object(arg);
1749+
len -= r_ivar_encoding(obj, arg, sym, val);
1750+
}
1751+
return len;
1752+
}
1753+
17101754
static void
17111755
r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
17121756
{
@@ -1723,14 +1767,7 @@ r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
17231767
do {
17241768
VALUE sym = r_symbol(arg);
17251769
VALUE val = r_object(arg);
1726-
int idx = sym2encidx(sym, val);
1727-
if (idx >= 0) {
1728-
if (rb_enc_capable(obj)) {
1729-
rb_enc_associate_index(obj, idx);
1730-
}
1731-
else {
1732-
rb_raise(rb_eArgError, "%"PRIsVALUE" is not enc_capable", obj);
1733-
}
1770+
if (r_ivar_encoding(obj, arg, sym, val)) {
17341771
if (has_encoding) *has_encoding = TRUE;
17351772
}
17361773
else if (symname_equal_lit(sym, name_s_ruby2_keywords_flag)) {
@@ -2254,6 +2291,7 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ
22542291
{
22552292
VALUE str = r_bytes(arg);
22562293

2294+
if (ivp && *ivp > 0) *ivp = r_encname(str, arg) > 0;
22572295
v = path2class(str);
22582296
prohibit_ivar("class", str);
22592297
v = r_entry(v, arg);
@@ -2265,6 +2303,7 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ
22652303
{
22662304
VALUE str = r_bytes(arg);
22672305

2306+
if (ivp && *ivp > 0) *ivp = r_encname(str, arg) > 0;
22682307
v = path2module(str);
22692308
prohibit_ivar("module", str);
22702309
v = r_entry(v, arg);

spec/ruby/core/marshal/dump_spec.rb

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -231,9 +231,12 @@ def _dump(level)
231231
Marshal.dump(MarshalSpec::ClassWithOverriddenName).should == "\x04\bc)MarshalSpec::ClassWithOverriddenName"
232232
end
233233

234-
it "dumps a class with multibyte characters in name" do
235-
source_object = eval("MarshalSpec::MultibyteぁあぃいClass".dup.force_encoding(Encoding::UTF_8))
236-
Marshal.dump(source_object).should == "\x04\bc,MarshalSpec::Multibyte\xE3\x81\x81\xE3\x81\x82\xE3\x81\x83\xE3\x81\x84Class"
234+
ruby_version_is "3.5" do
235+
it "dumps a class with multibyte characters in name" do
236+
source_object = eval("MarshalSpec::MultibyteぁあぃいClass".dup.force_encoding(Encoding::UTF_8))
237+
Marshal.dump(source_object).should == "\x04\bIc,MarshalSpec::Multibyte\xE3\x81\x81\xE3\x81\x82\xE3\x81\x83\xE3\x81\x84Class\x06:\x06ET"
238+
Marshal.load(Marshal.dump(source_object)) == source_object
239+
end
237240
end
238241

239242
it "uses object links for objects repeatedly dumped" do
@@ -258,9 +261,12 @@ def _dump(level)
258261
Marshal.dump(MarshalSpec::ModuleWithOverriddenName).should == "\x04\bc*MarshalSpec::ModuleWithOverriddenName"
259262
end
260263

261-
it "dumps a module with multibyte characters in name" do
262-
source_object = eval("MarshalSpec::MultibyteけげこごModule".dup.force_encoding(Encoding::UTF_8))
263-
Marshal.dump(source_object).should == "\x04\bm-MarshalSpec::Multibyte\xE3\x81\x91\xE3\x81\x92\xE3\x81\x93\xE3\x81\x94Module"
264+
ruby_version_is "3.5" do
265+
it "dumps a module with multibyte characters in name" do
266+
source_object = eval("MarshalSpec::MultibyteけげこごModule".dup.force_encoding(Encoding::UTF_8))
267+
Marshal.dump(source_object).should == "\x04\bIm-MarshalSpec::Multibyte\xE3\x81\x91\xE3\x81\x92\xE3\x81\x93\xE3\x81\x94Module\x06:\x06ET"
268+
Marshal.load(Marshal.dump(source_object)) == source_object
269+
end
264270
end
265271

266272
it "uses object links for objects repeatedly dumped" do
@@ -874,9 +880,12 @@ def finalizer.noop(_)
874880
Marshal.dump(obj).should include("MarshalSpec::TimeWithOverriddenName")
875881
end
876882

877-
it "dumps a Time subclass with multibyte characters in name" do
878-
source_object = eval("MarshalSpec::MultibyteぁあぃいTime".dup.force_encoding(Encoding::UTF_8))
879-
Marshal.dump(source_object).should == "\x04\bc+MarshalSpec::Multibyte\xE3\x81\x81\xE3\x81\x82\xE3\x81\x83\xE3\x81\x84Time"
883+
ruby_version_is "3.5" do
884+
it "dumps a Time subclass with multibyte characters in name" do
885+
source_object = eval("MarshalSpec::MultibyteぁあぃいTime".dup.force_encoding(Encoding::UTF_8))
886+
Marshal.dump(source_object).should == "\x04\bIc+MarshalSpec::Multibyte\xE3\x81\x81\xE3\x81\x82\xE3\x81\x83\xE3\x81\x84Time\x06:\x06ET"
887+
Marshal.load(Marshal.dump(source_object)) == source_object
888+
end
880889
end
881890

882891
it "uses object links for objects repeatedly dumped" do

test/ruby/test_marshal.rb

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,11 @@ def test_symlink_in_ivar
268268
classISO8859_1.name
269269
ClassISO8859_1 = classISO8859_1
270270

271-
def test_class_nonascii
271+
moduleUTF8 = const_set("C\u{30af 30e9 30b9}", Module.new)
272+
moduleUTF8.name
273+
ModuleUTF8 = moduleUTF8
274+
275+
def test_nonascii_class_instance
272276
a = ClassUTF8.new
273277
assert_instance_of(ClassUTF8, Marshal.load(Marshal.dump(a)), '[ruby-core:24790]')
274278

@@ -301,6 +305,12 @@ def test_class_nonascii
301305
end
302306
end
303307

308+
def test_nonascii_class_module
309+
assert_same(ClassUTF8, Marshal.load(Marshal.dump(ClassUTF8)))
310+
assert_same(ClassISO8859_1, Marshal.load(Marshal.dump(ClassISO8859_1)))
311+
assert_same(ModuleUTF8, Marshal.load(Marshal.dump(ModuleUTF8)))
312+
end
313+
304314
def test_regexp2
305315
assert_equal(/\\u/, Marshal.load("\004\b/\b\\\\u\000"))
306316
assert_equal(/u/, Marshal.load("\004\b/\a\\u\000"))

0 commit comments

Comments
 (0)