Skip to content

Commit 1c9856c

Browse files
committed
Allow encodings to be autoloaded through transcoding functions
Make sure VM lock is not held when calling `load_transcoder_entry`, as that causes deadlock inside ractors. `String#encode` now works inside ractors, among others.
1 parent 1c6b36a commit 1c9856c

File tree

4 files changed

+131
-84
lines changed

4 files changed

+131
-84
lines changed

encoding.c

Lines changed: 38 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ must_encindex(int index)
258258
int
259259
rb_to_encoding_index(VALUE enc)
260260
{
261+
ASSERT_vm_unlocking(); // can load encoding, so must not hold VM lock
261262
int idx;
262263
const char *name;
263264

@@ -667,15 +668,15 @@ int
667668
rb_enc_alias(const char *alias, const char *orig)
668669
{
669670
int idx, r;
671+
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
672+
enc_check_addable(enc_table, alias); // can raise
673+
}
674+
675+
idx = rb_enc_find_index(orig);
676+
if (idx < 0) return -1;
670677

671678
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
672-
enc_check_addable(enc_table, alias);
673-
if ((idx = rb_enc_find_index(orig)) < 0) {
674-
r = -1;
675-
}
676-
else {
677-
r = enc_alias(enc_table, alias, idx);
678-
}
679+
r = enc_alias(enc_table, alias, idx);
679680
}
680681

681682
return r;
@@ -742,6 +743,7 @@ int rb_require_internal_silent(VALUE fname);
742743
static int
743744
load_encoding(const char *name)
744745
{
746+
ASSERT_vm_unlocking();
745747
VALUE enclib = rb_sprintf("enc/%s.so", name);
746748
VALUE debug = ruby_debug;
747749
VALUE errinfo;
@@ -757,7 +759,7 @@ load_encoding(const char *name)
757759
enclib = rb_fstring(enclib);
758760
ruby_debug = Qfalse;
759761
errinfo = rb_errinfo();
760-
loaded = rb_require_internal_silent(enclib);
762+
loaded = rb_require_internal_silent(enclib); // must run without VM_LOCK
761763
ruby_debug = debug;
762764
rb_set_errinfo(errinfo);
763765

@@ -781,6 +783,7 @@ enc_autoload_body(rb_encoding *enc)
781783
{
782784
rb_encoding *base;
783785
int i = 0;
786+
ASSERT_vm_unlocking();
784787

785788
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
786789
base = enc_table->list[ENC_TO_ENCINDEX(enc)].base;
@@ -792,30 +795,32 @@ enc_autoload_body(rb_encoding *enc)
792795
}
793796
} while (enc_table->list[i].enc != base && (++i, 1));
794797
}
798+
}
799+
795800

796-
if (i != -1) {
797-
if (base) {
798-
bool do_register = true;
799-
if (rb_enc_autoload_p(base)) {
800-
if (rb_enc_autoload(base) < 0) {
801-
do_register = false;
802-
i = -1;
803-
}
801+
if (i != -1) {
802+
if (base) {
803+
bool do_register = true;
804+
if (rb_enc_autoload_p(base)) {
805+
if (rb_enc_autoload(base) < 0) {
806+
do_register = false;
807+
i = -1;
804808
}
809+
}
805810

806-
i = enc->ruby_encoding_index;
807-
if (do_register) {
811+
if (do_register) {
812+
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
813+
i = enc->ruby_encoding_index;
808814
enc_register_at(enc_table, i & ENC_INDEX_MASK, rb_enc_name(enc), base);
809815
((rb_raw_encoding *)enc)->ruby_encoding_index = i;
810816
}
811-
812-
i &= ENC_INDEX_MASK;
813817
}
814-
else {
815-
i = -2;
816-
}
817-
}
818818

819+
i &= ENC_INDEX_MASK;
820+
}
821+
else {
822+
i = -2;
823+
}
819824
}
820825

821826
return i;
@@ -824,6 +829,7 @@ enc_autoload_body(rb_encoding *enc)
824829
int
825830
rb_enc_autoload(rb_encoding *enc)
826831
{
832+
ASSERT_vm_unlocking();
827833
int i = enc_autoload_body(enc);
828834
if (i == -2) {
829835
i = load_encoding(rb_enc_name(enc));
@@ -844,6 +850,7 @@ int
844850
rb_enc_find_index(const char *name)
845851
{
846852
int i;
853+
ASSERT_vm_unlocking(); // it needs to be unlocked so it can call `load_encoding` if necessary
847854
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
848855
i = enc_registered(enc_table, name);
849856
}
@@ -1019,7 +1026,6 @@ rb_enc_associate_index(VALUE obj, int idx)
10191026
rb_encoding *enc;
10201027
int oldidx, oldtermlen, termlen;
10211028

1022-
/* enc_check_capable(obj);*/
10231029
rb_check_frozen(obj);
10241030
oldidx = rb_enc_get_index(obj);
10251031
if (oldidx == idx)
@@ -1526,6 +1532,9 @@ int rb_locale_charmap_index(void);
15261532
int
15271533
rb_locale_encindex(void)
15281534
{
1535+
// `rb_locale_charmap_index` can call `enc_find_index`, which can
1536+
// load an encoding. This needs to be done without VM lock held.
1537+
ASSERT_vm_unlocking();
15291538
int idx = rb_locale_charmap_index();
15301539

15311540
if (idx < 0) idx = ENCINDEX_UTF_8;
@@ -1584,6 +1593,10 @@ enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const cha
15841593
/* Already set */
15851594
overridden = TRUE;
15861595

1596+
if (!NIL_P(encoding)) {
1597+
enc_check_encoding(encoding); // loads it if necessary. Needs to be done outside of VM lock.
1598+
}
1599+
15871600
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
15881601
if (NIL_P(encoding)) {
15891602
def->index = -1;

hash.c

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5192,25 +5192,26 @@ env_enc_str_new(const char *ptr, long len, rb_encoding *enc)
51925192
}
51935193

51945194
static VALUE
5195-
env_str_new(const char *ptr, long len)
5195+
env_str_new(const char *ptr, long len, rb_encoding *enc)
51965196
{
5197-
return env_enc_str_new(ptr, len, env_encoding());
5197+
return env_enc_str_new(ptr, len, enc);
51985198
}
51995199

52005200
static VALUE
5201-
env_str_new2(const char *ptr)
5201+
env_str_new2(const char *ptr, rb_encoding *enc)
52025202
{
52035203
if (!ptr) return Qnil;
5204-
return env_str_new(ptr, strlen(ptr));
5204+
return env_str_new(ptr, strlen(ptr), enc);
52055205
}
52065206

52075207
static VALUE
52085208
getenv_with_lock(const char *name)
52095209
{
52105210
VALUE ret;
5211+
rb_encoding *enc = env_encoding();
52115212
ENV_LOCKING() {
52125213
const char *val = getenv(name);
5213-
ret = env_str_new2(val);
5214+
ret = env_str_new2(val, enc);
52145215
}
52155216
return ret;
52165217
}
@@ -5773,13 +5774,14 @@ env_values(void)
57735774
{
57745775
VALUE ary = rb_ary_new();
57755776

5777+
rb_encoding *enc = env_encoding();
57765778
ENV_LOCKING() {
57775779
char **env = GET_ENVIRON(environ);
57785780

57795781
while (*env) {
57805782
char *s = strchr(*env, '=');
57815783
if (s) {
5782-
rb_ary_push(ary, env_str_new2(s+1));
5784+
rb_ary_push(ary, env_str_new2(s+1, enc));
57835785
}
57845786
env++;
57855787
}
@@ -5865,14 +5867,15 @@ env_each_pair(VALUE ehash)
58655867

58665868
VALUE ary = rb_ary_new();
58675869

5870+
rb_encoding *enc = env_encoding();
58685871
ENV_LOCKING() {
58695872
char **env = GET_ENVIRON(environ);
58705873

58715874
while (*env) {
58725875
char *s = strchr(*env, '=');
58735876
if (s) {
5874-
rb_ary_push(ary, env_str_new(*env, s-*env));
5875-
rb_ary_push(ary, env_str_new2(s+1));
5877+
rb_ary_push(ary, env_str_new(*env, s-*env, enc));
5878+
rb_ary_push(ary, env_str_new2(s+1, enc));
58765879
}
58775880
env++;
58785881
}
@@ -6255,13 +6258,14 @@ env_to_a(VALUE _)
62556258
{
62566259
VALUE ary = rb_ary_new();
62576260

6261+
rb_encoding *enc = env_encoding();
62586262
ENV_LOCKING() {
62596263
char **env = GET_ENVIRON(environ);
62606264
while (*env) {
62616265
char *s = strchr(*env, '=');
62626266
if (s) {
6263-
rb_ary_push(ary, rb_assoc_new(env_str_new(*env, s-*env),
6264-
env_str_new2(s+1)));
6267+
rb_ary_push(ary, rb_assoc_new(env_str_new(*env, s-*env, enc),
6268+
env_str_new2(s+1, enc)));
62656269
}
62666270
env++;
62676271
}
@@ -6509,14 +6513,15 @@ env_key(VALUE dmy, VALUE value)
65096513
StringValue(value);
65106514
VALUE str = Qnil;
65116515

6516+
rb_encoding *enc = env_encoding();
65126517
ENV_LOCKING() {
65136518
char **env = GET_ENVIRON(environ);
65146519
while (*env) {
65156520
char *s = strchr(*env, '=');
65166521
if (s++) {
65176522
long len = strlen(s);
65186523
if (RSTRING_LEN(value) == len && strncmp(s, RSTRING_PTR(value), len) == 0) {
6519-
str = env_str_new(*env, s-*env-1);
6524+
str = env_str_new(*env, s-*env-1, enc);
65206525
break;
65216526
}
65226527
}
@@ -6533,13 +6538,14 @@ env_to_hash(void)
65336538
{
65346539
VALUE hash = rb_hash_new();
65356540

6541+
rb_encoding *enc = env_encoding();
65366542
ENV_LOCKING() {
65376543
char **env = GET_ENVIRON(environ);
65386544
while (*env) {
65396545
char *s = strchr(*env, '=');
65406546
if (s) {
6541-
rb_hash_aset(hash, env_str_new(*env, s-*env),
6542-
env_str_new2(s+1));
6547+
rb_hash_aset(hash, env_str_new(*env, s-*env, enc),
6548+
env_str_new2(s+1, enc));
65436549
}
65446550
env++;
65456551
}
@@ -6684,14 +6690,15 @@ env_shift(VALUE _)
66846690
VALUE result = Qnil;
66856691
VALUE key = Qnil;
66866692

6693+
rb_encoding *enc = env_encoding();
66876694
ENV_LOCKING() {
66886695
char **env = GET_ENVIRON(environ);
66896696
if (*env) {
66906697
const char *p = *env;
66916698
char *s = strchr(p, '=');
66926699
if (s) {
6693-
key = env_str_new(p, s-p);
6694-
VALUE val = env_str_new2(getenv(RSTRING_PTR(key)));
6700+
key = env_str_new(p, s-p, enc);
6701+
VALUE val = env_str_new2(getenv(RSTRING_PTR(key)), enc);
66956702
result = rb_assoc_new(key, val);
66966703
}
66976704
}

test/ruby/test_transcode.rb

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2320,6 +2320,46 @@ def test_newline_options
23202320
assert_equal("A\nB\nC", s.encode(usascii, newline: :lf))
23212321
end
23222322

2323+
def test_ractor_lazy_load_encoding
2324+
assert_ractor("#{<<~"begin;"}\n#{<<~'end;'}")
2325+
begin;
2326+
rs = []
2327+
autoload_encodings = Encoding.list.select { |e| e.inspect.include?("(autoload)") }.freeze
2328+
7.times do
2329+
rs << Ractor.new(autoload_encodings) do |encodings|
2330+
str = "\u0300"
2331+
encodings.each do |enc|
2332+
str.encode(enc) rescue Encoding::UndefinedConversionError
2333+
end
2334+
end
2335+
end
2336+
2337+
while rs.any?
2338+
r, _obj = Ractor.select(*rs)
2339+
rs.delete(r)
2340+
end
2341+
assert rs.empty?
2342+
end;
2343+
end
2344+
2345+
def test_ractor_lazy_load_encoding_random
2346+
assert_ractor("#{<<~"begin;"}\n#{<<~'end;'}")
2347+
begin;
2348+
rs = []
2349+
100.times do
2350+
rs << Ractor.new do
2351+
"\u0300".encode(Encoding.list.sample) rescue Encoding::UndefinedConversionError
2352+
end
2353+
end
2354+
2355+
while rs.any?
2356+
r, _obj = Ractor.select(*rs)
2357+
rs.delete(r)
2358+
end
2359+
assert rs.empty?
2360+
end;
2361+
end
2362+
23232363
private
23242364

23252365
def assert_conversion_both_ways_utf8(utf8, raw, encoding)

0 commit comments

Comments
 (0)