Skip to content

Commit 51fe205

Browse files
committed
Allow encodings to be autoloaded through transcoding functions
Make sure VM lock is not held when calling `load_transcoder_entry`, as that causes deadlock inside ractors. `String#encode` now works inside ractors, among others.
1 parent 1c6b36a commit 51fe205

File tree

4 files changed

+133
-79
lines changed

4 files changed

+133
-79
lines changed

encoding.c

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ static struct enc_table {
7373
st_table *names;
7474
} global_enc_table;
7575

76+
static const char *string_UTF_8;
77+
static const char *string_US_ASCII;
78+
static const char *string_ASCII_8BIT;
79+
7680
static int
7781
enc_names_free_i(st_data_t name, st_data_t idx, st_data_t args)
7882
{
@@ -258,6 +262,7 @@ must_encindex(int index)
258262
int
259263
rb_to_encoding_index(VALUE enc)
260264
{
265+
ASSERT_vm_unlocking(); // can load encoding, so must not hold VM lock
261266
int idx;
262267
const char *name;
263268

@@ -668,9 +673,11 @@ rb_enc_alias(const char *alias, const char *orig)
668673
{
669674
int idx, r;
670675

676+
idx = rb_enc_find_index(orig);
677+
671678
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
672679
enc_check_addable(enc_table, alias);
673-
if ((idx = rb_enc_find_index(orig)) < 0) {
680+
if (idx < 0) {
674681
r = -1;
675682
}
676683
else {
@@ -707,7 +714,8 @@ rb_enc_init(struct enc_table *enc_table)
707714
enc_table->names = st_init_strcasetable_with_size(ENCODING_LIST_CAPA);
708715
}
709716
#define OnigEncodingASCII_8BIT OnigEncodingASCII
710-
#define ENC_REGISTER(enc) enc_register_at(enc_table, ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
717+
#define ENC_REGISTER(enc) string_##enc = rb_enc_name(&OnigEncoding##enc); \
718+
enc_register_at(enc_table, ENCINDEX_##enc, string_##enc, &OnigEncoding##enc)
711719
ENC_REGISTER(ASCII_8BIT);
712720
ENC_REGISTER(UTF_8);
713721
ENC_REGISTER(US_ASCII);
@@ -742,6 +750,7 @@ int rb_require_internal_silent(VALUE fname);
742750
static int
743751
load_encoding(const char *name)
744752
{
753+
ASSERT_vm_unlocking();
745754
VALUE enclib = rb_sprintf("enc/%s.so", name);
746755
VALUE debug = ruby_debug;
747756
VALUE errinfo;
@@ -757,7 +766,7 @@ load_encoding(const char *name)
757766
enclib = rb_fstring(enclib);
758767
ruby_debug = Qfalse;
759768
errinfo = rb_errinfo();
760-
loaded = rb_require_internal_silent(enclib);
769+
loaded = rb_require_internal_silent(enclib); // must run without VM_LOCK
761770
ruby_debug = debug;
762771
rb_set_errinfo(errinfo);
763772

@@ -781,6 +790,7 @@ enc_autoload_body(rb_encoding *enc)
781790
{
782791
rb_encoding *base;
783792
int i = 0;
793+
ASSERT_vm_unlocking();
784794

785795
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
786796
base = enc_table->list[ENC_TO_ENCINDEX(enc)].base;
@@ -792,30 +802,32 @@ enc_autoload_body(rb_encoding *enc)
792802
}
793803
} while (enc_table->list[i].enc != base && (++i, 1));
794804
}
805+
}
806+
795807

796-
if (i != -1) {
797-
if (base) {
798-
bool do_register = true;
799-
if (rb_enc_autoload_p(base)) {
800-
if (rb_enc_autoload(base) < 0) {
801-
do_register = false;
802-
i = -1;
803-
}
808+
if (i != -1) {
809+
if (base) {
810+
bool do_register = true;
811+
if (rb_enc_autoload_p(base)) {
812+
if (rb_enc_autoload(base) < 0) {
813+
do_register = false;
814+
i = -1;
804815
}
816+
}
805817

806-
i = enc->ruby_encoding_index;
807-
if (do_register) {
818+
if (do_register) {
819+
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
820+
i = enc->ruby_encoding_index;
808821
enc_register_at(enc_table, i & ENC_INDEX_MASK, rb_enc_name(enc), base);
809822
((rb_raw_encoding *)enc)->ruby_encoding_index = i;
810823
}
811-
812-
i &= ENC_INDEX_MASK;
813824
}
814-
else {
815-
i = -2;
816-
}
817-
}
818825

826+
i &= ENC_INDEX_MASK;
827+
}
828+
else {
829+
i = -2;
830+
}
819831
}
820832

821833
return i;
@@ -824,6 +836,7 @@ enc_autoload_body(rb_encoding *enc)
824836
int
825837
rb_enc_autoload(rb_encoding *enc)
826838
{
839+
ASSERT_vm_unlocking();
827840
int i = enc_autoload_body(enc);
828841
if (i == -2) {
829842
i = load_encoding(rb_enc_name(enc));
@@ -844,6 +857,7 @@ int
844857
rb_enc_find_index(const char *name)
845858
{
846859
int i;
860+
ASSERT_vm_unlocking(); // it needs to be unlocked so it can call `load_encoding` if necessary
847861
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
848862
i = enc_registered(enc_table, name);
849863
}
@@ -1019,7 +1033,6 @@ rb_enc_associate_index(VALUE obj, int idx)
10191033
rb_encoding *enc;
10201034
int oldidx, oldtermlen, termlen;
10211035

1022-
/* enc_check_capable(obj);*/
10231036
rb_check_frozen(obj);
10241037
oldidx = rb_enc_get_index(obj);
10251038
if (oldidx == idx)
@@ -1526,6 +1539,9 @@ int rb_locale_charmap_index(void);
15261539
int
15271540
rb_locale_encindex(void)
15281541
{
1542+
// `rb_locale_charmap_index` can call `enc_find_index`, which can
1543+
// load an encoding. This needs to be done without VM lock held.
1544+
ASSERT_vm_unlocking();
15291545
int idx = rb_locale_charmap_index();
15301546

15311547
if (idx < 0) idx = ENCINDEX_UTF_8;
@@ -1584,6 +1600,10 @@ enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const cha
15841600
/* Already set */
15851601
overridden = TRUE;
15861602

1603+
if (!NIL_P(encoding)) {
1604+
enc_check_encoding(encoding); // loads it if necessary. Needs to be done outside of VM lock.
1605+
}
1606+
15871607
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
15881608
if (NIL_P(encoding)) {
15891609
def->index = -1;

hash.c

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5192,25 +5192,26 @@ env_enc_str_new(const char *ptr, long len, rb_encoding *enc)
51925192
}
51935193

51945194
static VALUE
5195-
env_str_new(const char *ptr, long len)
5195+
env_str_new(const char *ptr, long len, rb_encoding *enc)
51965196
{
5197-
return env_enc_str_new(ptr, len, env_encoding());
5197+
return env_enc_str_new(ptr, len, enc);
51985198
}
51995199

52005200
static VALUE
5201-
env_str_new2(const char *ptr)
5201+
env_str_new2(const char *ptr, rb_encoding *enc)
52025202
{
52035203
if (!ptr) return Qnil;
5204-
return env_str_new(ptr, strlen(ptr));
5204+
return env_str_new(ptr, strlen(ptr), enc);
52055205
}
52065206

52075207
static VALUE
52085208
getenv_with_lock(const char *name)
52095209
{
52105210
VALUE ret;
5211+
rb_encoding *enc = env_encoding();
52115212
ENV_LOCKING() {
52125213
const char *val = getenv(name);
5213-
ret = env_str_new2(val);
5214+
ret = env_str_new2(val, enc);
52145215
}
52155216
return ret;
52165217
}
@@ -5773,13 +5774,14 @@ env_values(void)
57735774
{
57745775
VALUE ary = rb_ary_new();
57755776

5777+
rb_encoding *enc = env_encoding();
57765778
ENV_LOCKING() {
57775779
char **env = GET_ENVIRON(environ);
57785780

57795781
while (*env) {
57805782
char *s = strchr(*env, '=');
57815783
if (s) {
5782-
rb_ary_push(ary, env_str_new2(s+1));
5784+
rb_ary_push(ary, env_str_new2(s+1, enc));
57835785
}
57845786
env++;
57855787
}
@@ -5865,14 +5867,15 @@ env_each_pair(VALUE ehash)
58655867

58665868
VALUE ary = rb_ary_new();
58675869

5870+
rb_encoding *enc = env_encoding();
58685871
ENV_LOCKING() {
58695872
char **env = GET_ENVIRON(environ);
58705873

58715874
while (*env) {
58725875
char *s = strchr(*env, '=');
58735876
if (s) {
5874-
rb_ary_push(ary, env_str_new(*env, s-*env));
5875-
rb_ary_push(ary, env_str_new2(s+1));
5877+
rb_ary_push(ary, env_str_new(*env, s-*env, enc));
5878+
rb_ary_push(ary, env_str_new2(s+1, enc));
58765879
}
58775880
env++;
58785881
}
@@ -6255,13 +6258,14 @@ env_to_a(VALUE _)
62556258
{
62566259
VALUE ary = rb_ary_new();
62576260

6261+
rb_encoding *enc = env_encoding();
62586262
ENV_LOCKING() {
62596263
char **env = GET_ENVIRON(environ);
62606264
while (*env) {
62616265
char *s = strchr(*env, '=');
62626266
if (s) {
6263-
rb_ary_push(ary, rb_assoc_new(env_str_new(*env, s-*env),
6264-
env_str_new2(s+1)));
6267+
rb_ary_push(ary, rb_assoc_new(env_str_new(*env, s-*env, enc),
6268+
env_str_new2(s+1, enc)));
62656269
}
62666270
env++;
62676271
}
@@ -6509,14 +6513,15 @@ env_key(VALUE dmy, VALUE value)
65096513
StringValue(value);
65106514
VALUE str = Qnil;
65116515

6516+
rb_encoding *enc = env_encoding();
65126517
ENV_LOCKING() {
65136518
char **env = GET_ENVIRON(environ);
65146519
while (*env) {
65156520
char *s = strchr(*env, '=');
65166521
if (s++) {
65176522
long len = strlen(s);
65186523
if (RSTRING_LEN(value) == len && strncmp(s, RSTRING_PTR(value), len) == 0) {
6519-
str = env_str_new(*env, s-*env-1);
6524+
str = env_str_new(*env, s-*env-1, enc);
65206525
break;
65216526
}
65226527
}
@@ -6533,13 +6538,14 @@ env_to_hash(void)
65336538
{
65346539
VALUE hash = rb_hash_new();
65356540

6541+
rb_encoding *enc = env_encoding();
65366542
ENV_LOCKING() {
65376543
char **env = GET_ENVIRON(environ);
65386544
while (*env) {
65396545
char *s = strchr(*env, '=');
65406546
if (s) {
6541-
rb_hash_aset(hash, env_str_new(*env, s-*env),
6542-
env_str_new2(s+1));
6547+
rb_hash_aset(hash, env_str_new(*env, s-*env, enc),
6548+
env_str_new2(s+1, enc));
65436549
}
65446550
env++;
65456551
}
@@ -6684,14 +6690,15 @@ env_shift(VALUE _)
66846690
VALUE result = Qnil;
66856691
VALUE key = Qnil;
66866692

6693+
rb_encoding *enc = env_encoding();
66876694
ENV_LOCKING() {
66886695
char **env = GET_ENVIRON(environ);
66896696
if (*env) {
66906697
const char *p = *env;
66916698
char *s = strchr(p, '=');
66926699
if (s) {
6693-
key = env_str_new(p, s-p);
6694-
VALUE val = env_str_new2(getenv(RSTRING_PTR(key)));
6700+
key = env_str_new(p, s-p, enc);
6701+
VALUE val = env_str_new2(getenv(RSTRING_PTR(key)), enc);
66956702
result = rb_assoc_new(key, val);
66966703
}
66976704
}

test/ruby/test_transcode.rb

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2320,6 +2320,46 @@ def test_newline_options
23202320
assert_equal("A\nB\nC", s.encode(usascii, newline: :lf))
23212321
end
23222322

2323+
def test_ractor_lazy_load_encoding
2324+
assert_ractor("#{<<~"begin;"}\n#{<<~'end;'}")
2325+
begin;
2326+
rs = []
2327+
autoload_encodings = Encoding.list.select { |e| e.inspect.include?("(autoload)") }.freeze
2328+
7.times do
2329+
rs << Ractor.new(autoload_encodings) do |encodings|
2330+
str = "\u0300"
2331+
encodings.each do |enc|
2332+
str.encode(enc) rescue Encoding::UndefinedConversionError
2333+
end
2334+
end
2335+
end
2336+
2337+
while rs.any?
2338+
r, _obj = Ractor.select(*rs)
2339+
rs.delete(r)
2340+
end
2341+
assert rs.empty?
2342+
end;
2343+
end
2344+
2345+
def test_ractor_lazy_load_encoding_random
2346+
assert_ractor("#{<<~"begin;"}\n#{<<~'end;'}")
2347+
begin;
2348+
rs = []
2349+
100.times do
2350+
rs << Ractor.new do
2351+
"\u0300".encode(Encoding.list.sample) rescue Encoding::UndefinedConversionError
2352+
end
2353+
end
2354+
2355+
while rs.any?
2356+
r, _obj = Ractor.select(*rs)
2357+
rs.delete(r)
2358+
end
2359+
assert rs.empty?
2360+
end;
2361+
end
2362+
23232363
private
23242364

23252365
def assert_conversion_both_ways_utf8(utf8, raw, encoding)

0 commit comments

Comments
 (0)