Skip to content

Commit e68adac

Browse files
committed
merge revision(s) 097d742: [Backport #20009]
[Bug #20009] Support marshaling non-ASCII name class/module
1 parent 82e05dc commit e68adac

File tree

4 files changed

+94
-36
lines changed

4 files changed

+94
-36
lines changed

marshal.c

Lines changed: 64 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,31 @@ w_float(double d, struct dump_arg *arg)
457457
}
458458
}
459459

460+
461+
static VALUE
462+
w_encivar(VALUE str, struct dump_arg *arg)
463+
{
464+
VALUE encname = encoding_name(str, arg);
465+
if (NIL_P(encname) ||
466+
is_ascii_string(str)) {
467+
return Qnil;
468+
}
469+
w_byte(TYPE_IVAR, arg);
470+
return encname;
471+
}
472+
473+
static void
474+
w_encname(VALUE encname, struct dump_arg *arg)
475+
{
476+
if (!NIL_P(encname)) {
477+
struct dump_call_arg c_arg;
478+
c_arg.limit = 1;
479+
c_arg.arg = arg;
480+
w_long(1L, arg);
481+
w_encoding(encname, &c_arg);
482+
}
483+
}
484+
460485
static void
461486
w_symbol(VALUE sym, struct dump_arg *arg)
462487
{
@@ -473,24 +498,11 @@ w_symbol(VALUE sym, struct dump_arg *arg)
473498
if (!sym) {
474499
rb_raise(rb_eTypeError, "can't dump anonymous ID %"PRIdVALUE, sym);
475500
}
476-
encname = encoding_name(sym, arg);
477-
if (NIL_P(encname) ||
478-
is_ascii_string(sym)) {
479-
encname = Qnil;
480-
}
481-
else {
482-
w_byte(TYPE_IVAR, arg);
483-
}
501+
encname = w_encivar(sym, arg);
484502
w_byte(TYPE_SYMBOL, arg);
485503
w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg);
486504
st_add_direct(arg->symbols, orig_sym, arg->symbols->num_entries);
487-
if (!NIL_P(encname)) {
488-
struct dump_call_arg c_arg;
489-
c_arg.limit = 1;
490-
c_arg.arg = arg;
491-
w_long(1L, arg);
492-
w_encoding(encname, &c_arg);
493-
}
505+
w_encname(encname, arg);
494506
}
495507
}
496508

@@ -947,19 +959,23 @@ w_object(VALUE obj, struct dump_arg *arg, int limit)
947959
if (FL_TEST(obj, FL_SINGLETON)) {
948960
rb_raise(rb_eTypeError, "singleton class can't be dumped");
949961
}
950-
w_byte(TYPE_CLASS, arg);
951962
{
952963
VALUE path = class2path(obj);
964+
VALUE encname = w_encivar(path, arg);
965+
w_byte(TYPE_CLASS, arg);
953966
w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
967+
w_encname(encname, arg);
954968
RB_GC_GUARD(path);
955969
}
956970
break;
957971

958972
case T_MODULE:
959-
w_byte(TYPE_MODULE, arg);
960973
{
961974
VALUE path = class2path(obj);
975+
VALUE encname = w_encivar(path, arg);
976+
w_byte(TYPE_MODULE, arg);
962977
w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
978+
w_encname(encname, arg);
963979
RB_GC_GUARD(path);
964980
}
965981
break;
@@ -1701,6 +1717,34 @@ r_copy_ivar(VALUE v, VALUE data)
17011717
"can't override instance variable of "type" '%"PRIsVALUE"'", \
17021718
(str))
17031719

1720+
static int
1721+
r_ivar_encoding(VALUE obj, struct load_arg *arg, VALUE sym, VALUE val)
1722+
{
1723+
int idx = sym2encidx(sym, val);
1724+
if (idx >= 0) {
1725+
if (rb_enc_capable(obj)) {
1726+
rb_enc_associate_index(obj, idx);
1727+
}
1728+
else {
1729+
rb_raise(rb_eArgError, "%"PRIsVALUE" is not enc_capable", obj);
1730+
}
1731+
return TRUE;
1732+
}
1733+
return FALSE;
1734+
}
1735+
1736+
static long
1737+
r_encname(VALUE obj, struct load_arg *arg)
1738+
{
1739+
long len = r_long(arg);
1740+
if (len > 0) {
1741+
VALUE sym = r_symbol(arg);
1742+
VALUE val = r_object(arg);
1743+
len -= r_ivar_encoding(obj, arg, sym, val);
1744+
}
1745+
return len;
1746+
}
1747+
17041748
static void
17051749
r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
17061750
{
@@ -1717,14 +1761,7 @@ r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
17171761
do {
17181762
VALUE sym = r_symbol(arg);
17191763
VALUE val = r_object(arg);
1720-
int idx = sym2encidx(sym, val);
1721-
if (idx >= 0) {
1722-
if (rb_enc_capable(obj)) {
1723-
rb_enc_associate_index(obj, idx);
1724-
}
1725-
else {
1726-
rb_raise(rb_eArgError, "%"PRIsVALUE" is not enc_capable", obj);
1727-
}
1764+
if (r_ivar_encoding(obj, arg, sym, val)) {
17281765
if (has_encoding) *has_encoding = TRUE;
17291766
}
17301767
else if (symname_equal_lit(sym, name_s_ruby2_keywords_flag)) {
@@ -2248,6 +2285,7 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ
22482285
{
22492286
VALUE str = r_bytes(arg);
22502287

2288+
if (ivp && *ivp > 0) *ivp = r_encname(str, arg) > 0;
22512289
v = path2class(str);
22522290
prohibit_ivar("class", str);
22532291
v = r_entry(v, arg);
@@ -2259,6 +2297,7 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ
22592297
{
22602298
VALUE str = r_bytes(arg);
22612299

2300+
if (ivp && *ivp > 0) *ivp = r_encname(str, arg) > 0;
22622301
v = path2module(str);
22632302
prohibit_ivar("module", str);
22642303
v = r_entry(v, arg);

spec/ruby/core/marshal/dump_spec.rb

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,12 @@ def _dump(level)
193193
Marshal.dump(MarshalSpec::ClassWithOverriddenName).should == "\x04\bc)MarshalSpec::ClassWithOverriddenName"
194194
end
195195

196-
it "dumps a class with multibyte characters in name" do
197-
source_object = eval("MarshalSpec::MultibyteぁあぃいClass".dup.force_encoding(Encoding::UTF_8))
198-
Marshal.dump(source_object).should == "\x04\bc,MarshalSpec::Multibyte\xE3\x81\x81\xE3\x81\x82\xE3\x81\x83\xE3\x81\x84Class"
196+
ruby_version_is "3.5" do
197+
it "dumps a class with multibyte characters in name" do
198+
source_object = eval("MarshalSpec::MultibyteぁあぃいClass".dup.force_encoding(Encoding::UTF_8))
199+
Marshal.dump(source_object).should == "\x04\bIc,MarshalSpec::Multibyte\xE3\x81\x81\xE3\x81\x82\xE3\x81\x83\xE3\x81\x84Class\x06:\x06ET"
200+
Marshal.load(Marshal.dump(source_object)) == source_object
201+
end
199202
end
200203

201204
it "raises TypeError with an anonymous Class" do
@@ -216,9 +219,12 @@ def _dump(level)
216219
Marshal.dump(MarshalSpec::ModuleWithOverriddenName).should == "\x04\bc*MarshalSpec::ModuleWithOverriddenName"
217220
end
218221

219-
it "dumps a module with multibyte characters in name" do
220-
source_object = eval("MarshalSpec::MultibyteけげこごModule".dup.force_encoding(Encoding::UTF_8))
221-
Marshal.dump(source_object).should == "\x04\bm-MarshalSpec::Multibyte\xE3\x81\x91\xE3\x81\x92\xE3\x81\x93\xE3\x81\x94Module"
222+
ruby_version_is "3.5" do
223+
it "dumps a module with multibyte characters in name" do
224+
source_object = eval("MarshalSpec::MultibyteけげこごModule".dup.force_encoding(Encoding::UTF_8))
225+
Marshal.dump(source_object).should == "\x04\bIm-MarshalSpec::Multibyte\xE3\x81\x91\xE3\x81\x92\xE3\x81\x93\xE3\x81\x94Module\x06:\x06ET"
226+
Marshal.load(Marshal.dump(source_object)) == source_object
227+
end
222228
end
223229

224230
it "raises TypeError with an anonymous Module" do
@@ -706,9 +712,12 @@ def finalizer.noop(_)
706712
Marshal.dump(obj).should include("MarshalSpec::TimeWithOverriddenName")
707713
end
708714

709-
it "dumps a Time subclass with multibyte characters in name" do
710-
source_object = eval("MarshalSpec::MultibyteぁあぃいTime".dup.force_encoding(Encoding::UTF_8))
711-
Marshal.dump(source_object).should == "\x04\bc+MarshalSpec::Multibyte\xE3\x81\x81\xE3\x81\x82\xE3\x81\x83\xE3\x81\x84Time"
715+
ruby_version_is "3.5" do
716+
it "dumps a Time subclass with multibyte characters in name" do
717+
source_object = eval("MarshalSpec::MultibyteぁあぃいTime".dup.force_encoding(Encoding::UTF_8))
718+
Marshal.dump(source_object).should == "\x04\bIc+MarshalSpec::Multibyte\xE3\x81\x81\xE3\x81\x82\xE3\x81\x83\xE3\x81\x84Time\x06:\x06ET"
719+
Marshal.load(Marshal.dump(source_object)) == source_object
720+
end
712721
end
713722

714723
it "raises TypeError with an anonymous Time subclass" do

test/ruby/test_marshal.rb

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,11 @@ def test_symlink_in_ivar
268268
classISO8859_1.name
269269
ClassISO8859_1 = classISO8859_1
270270

271-
def test_class_nonascii
271+
moduleUTF8 = const_set("C\u{30af 30e9 30b9}", Module.new)
272+
moduleUTF8.name
273+
ModuleUTF8 = moduleUTF8
274+
275+
def test_nonascii_class_instance
272276
a = ClassUTF8.new
273277
assert_instance_of(ClassUTF8, Marshal.load(Marshal.dump(a)), '[ruby-core:24790]')
274278

@@ -301,6 +305,12 @@ def test_class_nonascii
301305
end
302306
end
303307

308+
def test_nonascii_class_module
309+
assert_same(ClassUTF8, Marshal.load(Marshal.dump(ClassUTF8)))
310+
assert_same(ClassISO8859_1, Marshal.load(Marshal.dump(ClassISO8859_1)))
311+
assert_same(ModuleUTF8, Marshal.load(Marshal.dump(ModuleUTF8)))
312+
end
313+
304314
def test_regexp2
305315
assert_equal(/\\u/, Marshal.load("\004\b/\b\\\\u\000"))
306316
assert_equal(/u/, Marshal.load("\004\b/\a\\u\000"))

version.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# define RUBY_VERSION_MINOR RUBY_API_VERSION_MINOR
1212
#define RUBY_VERSION_TEENY 4
1313
#define RUBY_RELEASE_DATE RUBY_RELEASE_YEAR_STR"-"RUBY_RELEASE_MONTH_STR"-"RUBY_RELEASE_DAY_STR
14-
#define RUBY_PATCHLEVEL 43
14+
#define RUBY_PATCHLEVEL 44
1515

1616
#include "ruby/version.h"
1717
#include "ruby/internal/abi.h"

0 commit comments

Comments
 (0)