Skip to content

Commit ec8327d

Browse files
committed
Allow encodings to be autoloaded through transcoding functions
Make sure VM lock is not held when calling `load_transcoder_entry`, as that causes deadlock inside ractors. `String#encode` now works inside ractors, among others. Atomic load the rb_encoding_list Without this, wbcheck would sometimes hit a missing write barrier. Co-authored-by: John Hawthorn <[email protected]> Hold VM lock when iterating over global_enc_table.names This st_table can be inserted into at runtime when autoloading encodings. minor optimization when calling Encoding.list
1 parent 61fff8a commit ec8327d

File tree

4 files changed

+158
-95
lines changed

4 files changed

+158
-95
lines changed

encoding.c

Lines changed: 65 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "ruby/util.h"
3030
#include "ruby_assert.h"
3131
#include "vm_sync.h"
32+
#include "ruby_atomic.h"
3233

3334
#ifndef ENC_DEBUG
3435
#define ENC_DEBUG 0
@@ -144,10 +145,14 @@ enc_list_update(int index, rb_raw_encoding *encoding)
144145
{
145146
RUBY_ASSERT(index < ENCODING_LIST_CAPA);
146147

147-
VALUE list = rb_encoding_list;
148+
VALUE list = RUBY_ATOMIC_VALUE_LOAD(rb_encoding_list);
149+
148150
if (list && NIL_P(rb_ary_entry(list, index))) {
151+
VALUE new_list = rb_ary_dup(list);
152+
RBASIC_CLEAR_CLASS(new_list);
149153
/* initialize encoding data */
150-
rb_ary_store(list, index, enc_new(encoding));
154+
rb_ary_store(new_list, index, enc_new(encoding));
155+
RUBY_ATOMIC_VALUE_SET(rb_encoding_list, new_list);
151156
}
152157
}
153158

@@ -157,7 +162,7 @@ enc_list_lookup(int idx)
157162
VALUE list, enc = Qnil;
158163

159164
if (idx < ENCODING_LIST_CAPA) {
160-
list = rb_encoding_list;
165+
list = RUBY_ATOMIC_VALUE_LOAD(rb_encoding_list);
161166
RUBY_ASSERT(list);
162167
enc = rb_ary_entry(list, idx);
163168
}
@@ -258,6 +263,7 @@ must_encindex(int index)
258263
int
259264
rb_to_encoding_index(VALUE enc)
260265
{
266+
ASSERT_vm_unlocking(); // can load encoding, so must not hold VM lock
261267
int idx;
262268
const char *name;
263269

@@ -667,15 +673,15 @@ int
667673
rb_enc_alias(const char *alias, const char *orig)
668674
{
669675
int idx, r;
676+
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
677+
enc_check_addable(enc_table, alias); // can raise
678+
}
679+
680+
idx = rb_enc_find_index(orig);
681+
if (idx < 0) return -1;
670682

671683
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
672-
enc_check_addable(enc_table, alias);
673-
if ((idx = rb_enc_find_index(orig)) < 0) {
674-
r = -1;
675-
}
676-
else {
677-
r = enc_alias(enc_table, alias, idx);
678-
}
684+
r = enc_alias(enc_table, alias, idx);
679685
}
680686

681687
return r;
@@ -742,6 +748,7 @@ int rb_require_internal_silent(VALUE fname);
742748
static int
743749
load_encoding(const char *name)
744750
{
751+
ASSERT_vm_unlocking();
745752
VALUE enclib = rb_sprintf("enc/%s.so", name);
746753
VALUE debug = ruby_debug;
747754
VALUE errinfo;
@@ -757,7 +764,7 @@ load_encoding(const char *name)
757764
enclib = rb_fstring(enclib);
758765
ruby_debug = Qfalse;
759766
errinfo = rb_errinfo();
760-
loaded = rb_require_internal_silent(enclib);
767+
loaded = rb_require_internal_silent(enclib); // must run without VM_LOCK
761768
ruby_debug = debug;
762769
rb_set_errinfo(errinfo);
763770

@@ -781,6 +788,7 @@ enc_autoload_body(rb_encoding *enc)
781788
{
782789
rb_encoding *base;
783790
int i = 0;
791+
ASSERT_vm_unlocking();
784792

785793
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
786794
base = enc_table->list[ENC_TO_ENCINDEX(enc)].base;
@@ -792,30 +800,32 @@ enc_autoload_body(rb_encoding *enc)
792800
}
793801
} while (enc_table->list[i].enc != base && (++i, 1));
794802
}
803+
}
804+
795805

796-
if (i != -1) {
797-
if (base) {
798-
bool do_register = true;
799-
if (rb_enc_autoload_p(base)) {
800-
if (rb_enc_autoload(base) < 0) {
801-
do_register = false;
802-
i = -1;
803-
}
806+
if (i != -1) {
807+
if (base) {
808+
bool do_register = true;
809+
if (rb_enc_autoload_p(base)) {
810+
if (rb_enc_autoload(base) < 0) {
811+
do_register = false;
812+
i = -1;
804813
}
814+
}
805815

806-
i = enc->ruby_encoding_index;
807-
if (do_register) {
816+
if (do_register) {
817+
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
818+
i = enc->ruby_encoding_index;
808819
enc_register_at(enc_table, i & ENC_INDEX_MASK, rb_enc_name(enc), base);
809820
((rb_raw_encoding *)enc)->ruby_encoding_index = i;
810821
}
811-
812-
i &= ENC_INDEX_MASK;
813-
}
814-
else {
815-
i = -2;
816822
}
817-
}
818823

824+
i &= ENC_INDEX_MASK;
825+
}
826+
else {
827+
i = -2;
828+
}
819829
}
820830

821831
return i;
@@ -824,6 +834,7 @@ enc_autoload_body(rb_encoding *enc)
824834
int
825835
rb_enc_autoload(rb_encoding *enc)
826836
{
837+
ASSERT_vm_unlocking();
827838
int i = enc_autoload_body(enc);
828839
if (i == -2) {
829840
i = load_encoding(rb_enc_name(enc));
@@ -844,6 +855,7 @@ int
844855
rb_enc_find_index(const char *name)
845856
{
846857
int i;
858+
ASSERT_vm_unlocking(); // it needs to be unlocked so it can call `load_encoding` if necessary
847859
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
848860
i = enc_registered(enc_table, name);
849861
}
@@ -1019,7 +1031,6 @@ rb_enc_associate_index(VALUE obj, int idx)
10191031
rb_encoding *enc;
10201032
int oldidx, oldtermlen, termlen;
10211033

1022-
/* enc_check_capable(obj);*/
10231034
rb_check_frozen(obj);
10241035
oldidx = rb_enc_get_index(obj);
10251036
if (oldidx == idx)
@@ -1355,7 +1366,10 @@ enc_names(VALUE self)
13551366

13561367
args[0] = (VALUE)rb_to_encoding_index(self);
13571368
args[1] = rb_ary_new2(0);
1358-
st_foreach(global_enc_table.names, enc_names_i, (st_data_t)args);
1369+
1370+
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
1371+
st_foreach(enc_table->names, enc_names_i, (st_data_t)args);
1372+
}
13591373
return args[1];
13601374
}
13611375

@@ -1380,8 +1394,9 @@ enc_names(VALUE self)
13801394
static VALUE
13811395
enc_list(VALUE klass)
13821396
{
1383-
VALUE ary = rb_ary_new2(0);
1384-
rb_ary_replace(ary, rb_encoding_list);
1397+
VALUE ary = rb_ary_new2(ENCODING_LIST_CAPA);
1398+
VALUE list = RUBY_ATOMIC_VALUE_LOAD(rb_encoding_list);
1399+
rb_ary_replace(ary, list);
13851400
return ary;
13861401
}
13871402

@@ -1526,6 +1541,9 @@ int rb_locale_charmap_index(void);
15261541
int
15271542
rb_locale_encindex(void)
15281543
{
1544+
// `rb_locale_charmap_index` can call `enc_find_index`, which can
1545+
// load an encoding. This needs to be done without VM lock held.
1546+
ASSERT_vm_unlocking();
15291547
int idx = rb_locale_charmap_index();
15301548

15311549
if (idx < 0) idx = ENCINDEX_UTF_8;
@@ -1584,6 +1602,10 @@ enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const cha
15841602
/* Already set */
15851603
overridden = TRUE;
15861604

1605+
if (!NIL_P(encoding)) {
1606+
enc_check_encoding(encoding); // loads it if necessary. Needs to be done outside of VM lock.
1607+
}
1608+
15871609
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
15881610
if (NIL_P(encoding)) {
15891611
def->index = -1;
@@ -1854,8 +1876,11 @@ rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg)
18541876
static VALUE
18551877
rb_enc_name_list(VALUE klass)
18561878
{
1857-
VALUE ary = rb_ary_new2(global_enc_table.names->num_entries);
1858-
st_foreach(global_enc_table.names, rb_enc_name_list_i, (st_data_t)ary);
1879+
VALUE ary;
1880+
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
1881+
ary = rb_ary_new2(enc_table->names->num_entries);
1882+
st_foreach(enc_table->names, rb_enc_name_list_i, (st_data_t)ary);
1883+
}
18591884
return ary;
18601885
}
18611886

@@ -1901,7 +1926,9 @@ rb_enc_aliases(VALUE klass)
19011926
aliases[0] = rb_hash_new();
19021927
aliases[1] = rb_ary_new();
19031928

1904-
st_foreach(global_enc_table.names, rb_enc_aliases_enc_i, (st_data_t)aliases);
1929+
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
1930+
st_foreach(enc_table->names, rb_enc_aliases_enc_i, (st_data_t)aliases);
1931+
}
19051932

19061933
return aliases[0];
19071934
}
@@ -1969,9 +1996,9 @@ Init_Encoding(void)
19691996

19701997
struct enc_table *enc_table = &global_enc_table;
19711998

1999+
rb_gc_register_address(&rb_encoding_list);
19722000
list = rb_encoding_list = rb_ary_new2(ENCODING_LIST_CAPA);
19732001
RBASIC_CLEAR_CLASS(list);
1974-
rb_vm_register_global_object(list);
19752002

19762003
for (i = 0; i < enc_table->count; ++i) {
19772004
rb_ary_push(list, enc_new(enc_table->list[i].enc));
@@ -2003,5 +2030,7 @@ Init_encodings(void)
20032030
void
20042031
rb_enc_foreach_name(int (*func)(st_data_t name, st_data_t idx, st_data_t arg), st_data_t arg)
20052032
{
2006-
st_foreach(global_enc_table.names, func, arg);
2033+
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
2034+
st_foreach(enc_table->names, func, arg);
2035+
}
20072036
}

hash.c

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5192,25 +5192,26 @@ env_enc_str_new(const char *ptr, long len, rb_encoding *enc)
51925192
}
51935193

51945194
static VALUE
5195-
env_str_new(const char *ptr, long len)
5195+
env_str_new(const char *ptr, long len, rb_encoding *enc)
51965196
{
5197-
return env_enc_str_new(ptr, len, env_encoding());
5197+
return env_enc_str_new(ptr, len, enc);
51985198
}
51995199

52005200
static VALUE
5201-
env_str_new2(const char *ptr)
5201+
env_str_new2(const char *ptr, rb_encoding *enc)
52025202
{
52035203
if (!ptr) return Qnil;
5204-
return env_str_new(ptr, strlen(ptr));
5204+
return env_str_new(ptr, strlen(ptr), enc);
52055205
}
52065206

52075207
static VALUE
52085208
getenv_with_lock(const char *name)
52095209
{
52105210
VALUE ret;
5211+
rb_encoding *enc = env_encoding();
52115212
ENV_LOCKING() {
52125213
const char *val = getenv(name);
5213-
ret = env_str_new2(val);
5214+
ret = env_str_new2(val, enc);
52145215
}
52155216
return ret;
52165217
}
@@ -5773,13 +5774,14 @@ env_values(void)
57735774
{
57745775
VALUE ary = rb_ary_new();
57755776

5777+
rb_encoding *enc = env_encoding();
57765778
ENV_LOCKING() {
57775779
char **env = GET_ENVIRON(environ);
57785780

57795781
while (*env) {
57805782
char *s = strchr(*env, '=');
57815783
if (s) {
5782-
rb_ary_push(ary, env_str_new2(s+1));
5784+
rb_ary_push(ary, env_str_new2(s+1, enc));
57835785
}
57845786
env++;
57855787
}
@@ -5865,14 +5867,15 @@ env_each_pair(VALUE ehash)
58655867

58665868
VALUE ary = rb_ary_new();
58675869

5870+
rb_encoding *enc = env_encoding();
58685871
ENV_LOCKING() {
58695872
char **env = GET_ENVIRON(environ);
58705873

58715874
while (*env) {
58725875
char *s = strchr(*env, '=');
58735876
if (s) {
5874-
rb_ary_push(ary, env_str_new(*env, s-*env));
5875-
rb_ary_push(ary, env_str_new2(s+1));
5877+
rb_ary_push(ary, env_str_new(*env, s-*env, enc));
5878+
rb_ary_push(ary, env_str_new2(s+1, enc));
58765879
}
58775880
env++;
58785881
}
@@ -6255,13 +6258,14 @@ env_to_a(VALUE _)
62556258
{
62566259
VALUE ary = rb_ary_new();
62576260

6261+
rb_encoding *enc = env_encoding();
62586262
ENV_LOCKING() {
62596263
char **env = GET_ENVIRON(environ);
62606264
while (*env) {
62616265
char *s = strchr(*env, '=');
62626266
if (s) {
6263-
rb_ary_push(ary, rb_assoc_new(env_str_new(*env, s-*env),
6264-
env_str_new2(s+1)));
6267+
rb_ary_push(ary, rb_assoc_new(env_str_new(*env, s-*env, enc),
6268+
env_str_new2(s+1, enc)));
62656269
}
62666270
env++;
62676271
}
@@ -6509,14 +6513,15 @@ env_key(VALUE dmy, VALUE value)
65096513
StringValue(value);
65106514
VALUE str = Qnil;
65116515

6516+
rb_encoding *enc = env_encoding();
65126517
ENV_LOCKING() {
65136518
char **env = GET_ENVIRON(environ);
65146519
while (*env) {
65156520
char *s = strchr(*env, '=');
65166521
if (s++) {
65176522
long len = strlen(s);
65186523
if (RSTRING_LEN(value) == len && strncmp(s, RSTRING_PTR(value), len) == 0) {
6519-
str = env_str_new(*env, s-*env-1);
6524+
str = env_str_new(*env, s-*env-1, enc);
65206525
break;
65216526
}
65226527
}
@@ -6533,13 +6538,14 @@ env_to_hash(void)
65336538
{
65346539
VALUE hash = rb_hash_new();
65356540

6541+
rb_encoding *enc = env_encoding();
65366542
ENV_LOCKING() {
65376543
char **env = GET_ENVIRON(environ);
65386544
while (*env) {
65396545
char *s = strchr(*env, '=');
65406546
if (s) {
6541-
rb_hash_aset(hash, env_str_new(*env, s-*env),
6542-
env_str_new2(s+1));
6547+
rb_hash_aset(hash, env_str_new(*env, s-*env, enc),
6548+
env_str_new2(s+1, enc));
65436549
}
65446550
env++;
65456551
}
@@ -6684,14 +6690,15 @@ env_shift(VALUE _)
66846690
VALUE result = Qnil;
66856691
VALUE key = Qnil;
66866692

6693+
rb_encoding *enc = env_encoding();
66876694
ENV_LOCKING() {
66886695
char **env = GET_ENVIRON(environ);
66896696
if (*env) {
66906697
const char *p = *env;
66916698
char *s = strchr(p, '=');
66926699
if (s) {
6693-
key = env_str_new(p, s-p);
6694-
VALUE val = env_str_new2(getenv(RSTRING_PTR(key)));
6700+
key = env_str_new(p, s-p, enc);
6701+
VALUE val = env_str_new2(getenv(RSTRING_PTR(key)), enc);
66956702
result = rb_assoc_new(key, val);
66966703
}
66976704
}

0 commit comments

Comments
 (0)