Skip to content

[testing CI] Fix str encode in ractors #691

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions depend
Original file line number Diff line number Diff line change
Expand Up @@ -17738,6 +17738,7 @@ transcode.$(OBJEXT): $(top_srcdir)/internal/transcode.h
transcode.$(OBJEXT): $(top_srcdir)/internal/variable.h
transcode.$(OBJEXT): $(top_srcdir)/internal/warnings.h
transcode.$(OBJEXT): {$(VPATH)}assert.h
transcode.$(OBJEXT): {$(VPATH)}atomic.h
transcode.$(OBJEXT): {$(VPATH)}backward/2/assume.h
transcode.$(OBJEXT): {$(VPATH)}backward/2/attributes.h
transcode.$(OBJEXT): {$(VPATH)}backward/2/bool.h
Expand Down Expand Up @@ -17909,6 +17910,7 @@ transcode.$(OBJEXT): {$(VPATH)}internal/xmalloc.h
transcode.$(OBJEXT): {$(VPATH)}missing.h
transcode.$(OBJEXT): {$(VPATH)}onigmo.h
transcode.$(OBJEXT): {$(VPATH)}oniguruma.h
transcode.$(OBJEXT): {$(VPATH)}ruby_atomic.h
transcode.$(OBJEXT): {$(VPATH)}shape.h
transcode.$(OBJEXT): {$(VPATH)}st.h
transcode.$(OBJEXT): {$(VPATH)}subst.h
Expand Down
77 changes: 57 additions & 20 deletions encoding.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ static struct enc_table {
st_table *names;
} global_enc_table;

static const char *string_UTF_8;
static const char *string_US_ASCII;
static const char *string_ASCII_8BIT;

static int
enc_names_free_i(st_data_t name, st_data_t idx, st_data_t args)
{
Expand Down Expand Up @@ -258,6 +262,7 @@ must_encindex(int index)
int
rb_to_encoding_index(VALUE enc)
{
ASSERT_vm_unlocking(); // can load encoding, so must not hold VM lock
int idx;
const char *name;

Expand Down Expand Up @@ -668,9 +673,11 @@ rb_enc_alias(const char *alias, const char *orig)
{
int idx, r;

idx = rb_enc_find_index(orig);

GLOBAL_ENC_TABLE_LOCKING(enc_table) {
enc_check_addable(enc_table, alias);
if ((idx = rb_enc_find_index(orig)) < 0) {
if (idx < 0) {
r = -1;
}
else {
Expand Down Expand Up @@ -707,7 +714,8 @@ rb_enc_init(struct enc_table *enc_table)
enc_table->names = st_init_strcasetable_with_size(ENCODING_LIST_CAPA);
}
#define OnigEncodingASCII_8BIT OnigEncodingASCII
#define ENC_REGISTER(enc) enc_register_at(enc_table, ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
#define ENC_REGISTER(enc) string_##enc = rb_enc_name(&OnigEncoding##enc); \
enc_register_at(enc_table, ENCINDEX_##enc, string_##enc, &OnigEncoding##enc)
ENC_REGISTER(ASCII_8BIT);
ENC_REGISTER(UTF_8);
ENC_REGISTER(US_ASCII);
Expand Down Expand Up @@ -742,6 +750,7 @@ int rb_require_internal_silent(VALUE fname);
static int
load_encoding(const char *name)
{
ASSERT_vm_unlocking();
VALUE enclib = rb_sprintf("enc/%s.so", name);
VALUE debug = ruby_debug;
VALUE errinfo;
Expand All @@ -757,7 +766,7 @@ load_encoding(const char *name)
enclib = rb_fstring(enclib);
ruby_debug = Qfalse;
errinfo = rb_errinfo();
loaded = rb_require_internal_silent(enclib);
loaded = rb_require_internal_silent(enclib); // must run without VM_LOCK
ruby_debug = debug;
rb_set_errinfo(errinfo);

Expand All @@ -781,6 +790,7 @@ enc_autoload_body(rb_encoding *enc)
{
rb_encoding *base;
int i = 0;
ASSERT_vm_unlocking();

GLOBAL_ENC_TABLE_LOCKING(enc_table) {
base = enc_table->list[ENC_TO_ENCINDEX(enc)].base;
Expand All @@ -792,30 +802,32 @@ enc_autoload_body(rb_encoding *enc)
}
} while (enc_table->list[i].enc != base && (++i, 1));
}
}


if (i != -1) {
if (base) {
bool do_register = true;
if (rb_enc_autoload_p(base)) {
if (rb_enc_autoload(base) < 0) {
do_register = false;
i = -1;
}
if (i != -1) {
if (base) {
bool do_register = true;
if (rb_enc_autoload_p(base)) {
if (rb_enc_autoload(base) < 0) {
do_register = false;
i = -1;
}
}

i = enc->ruby_encoding_index;
if (do_register) {
if (do_register) {
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
i = enc->ruby_encoding_index;
enc_register_at(enc_table, i & ENC_INDEX_MASK, rb_enc_name(enc), base);
((rb_raw_encoding *)enc)->ruby_encoding_index = i;
}

i &= ENC_INDEX_MASK;
}
else {
i = -2;
}
}

i &= ENC_INDEX_MASK;
}
else {
i = -2;
}
}

return i;
Expand All @@ -824,6 +836,7 @@ enc_autoload_body(rb_encoding *enc)
int
rb_enc_autoload(rb_encoding *enc)
{
ASSERT_vm_unlocking();
int i = enc_autoload_body(enc);
if (i == -2) {
i = load_encoding(rb_enc_name(enc));
Expand All @@ -843,6 +856,24 @@ rb_enc_autoload_p(rb_encoding *enc)
int
rb_enc_find_index(const char *name)
{
ASSERT_vm_unlocking(); // it needs to be unlocked so it can call `load_encoding` if necessary
size_t input_len = strlen(name);
switch(input_len) {
case 5:
if (STRCASECMP(name, string_UTF_8) == 0) {
return ENCINDEX_UTF_8;
}
case 8:
if (STRCASECMP(name, string_US_ASCII) == 0) {
return ENCINDEX_US_ASCII;
}
case 10:
if (STRCASECMP(name, string_ASCII_8BIT) == 0) {
return ENCINDEX_ASCII_8BIT;
}
default:
break;
}
int i;
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
i = enc_registered(enc_table, name);
Expand Down Expand Up @@ -1019,7 +1050,6 @@ rb_enc_associate_index(VALUE obj, int idx)
rb_encoding *enc;
int oldidx, oldtermlen, termlen;

/* enc_check_capable(obj);*/
rb_check_frozen(obj);
oldidx = rb_enc_get_index(obj);
if (oldidx == idx)
Expand Down Expand Up @@ -1526,6 +1556,9 @@ int rb_locale_charmap_index(void);
int
rb_locale_encindex(void)
{
// `rb_locale_charmap_index` can call `enc_find_index`, which can
// load an encoding. This needs to be done without VM lock held.
ASSERT_vm_unlocking();
int idx = rb_locale_charmap_index();

if (idx < 0) idx = ENCINDEX_UTF_8;
Expand Down Expand Up @@ -1584,6 +1617,10 @@ enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const cha
/* Already set */
overridden = TRUE;

if (!NIL_P(encoding)) {
enc_check_encoding(encoding); // loads it if necessary. Needs to be done outside of VM lock.
}

GLOBAL_ENC_TABLE_LOCKING(enc_table) {
if (NIL_P(encoding)) {
def->index = -1;
Expand Down
37 changes: 22 additions & 15 deletions hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -5192,25 +5192,26 @@ env_enc_str_new(const char *ptr, long len, rb_encoding *enc)
}

static VALUE
env_str_new(const char *ptr, long len)
env_str_new(const char *ptr, long len, rb_encoding *enc)
{
return env_enc_str_new(ptr, len, env_encoding());
return env_enc_str_new(ptr, len, enc);
}

static VALUE
env_str_new2(const char *ptr)
env_str_new2(const char *ptr, rb_encoding *enc)
{
if (!ptr) return Qnil;
return env_str_new(ptr, strlen(ptr));
return env_str_new(ptr, strlen(ptr), enc);
}

static VALUE
getenv_with_lock(const char *name)
{
VALUE ret;
rb_encoding *enc = env_encoding();
ENV_LOCKING() {
const char *val = getenv(name);
ret = env_str_new2(val);
ret = env_str_new2(val, enc);
}
return ret;
}
Expand Down Expand Up @@ -5773,13 +5774,14 @@ env_values(void)
{
VALUE ary = rb_ary_new();

rb_encoding *enc = env_encoding();
ENV_LOCKING() {
char **env = GET_ENVIRON(environ);

while (*env) {
char *s = strchr(*env, '=');
if (s) {
rb_ary_push(ary, env_str_new2(s+1));
rb_ary_push(ary, env_str_new2(s+1, enc));
}
env++;
}
Expand Down Expand Up @@ -5865,14 +5867,15 @@ env_each_pair(VALUE ehash)

VALUE ary = rb_ary_new();

rb_encoding *enc = env_encoding();
ENV_LOCKING() {
char **env = GET_ENVIRON(environ);

while (*env) {
char *s = strchr(*env, '=');
if (s) {
rb_ary_push(ary, env_str_new(*env, s-*env));
rb_ary_push(ary, env_str_new2(s+1));
rb_ary_push(ary, env_str_new(*env, s-*env, enc));
rb_ary_push(ary, env_str_new2(s+1, enc));
}
env++;
}
Expand Down Expand Up @@ -6255,13 +6258,14 @@ env_to_a(VALUE _)
{
VALUE ary = rb_ary_new();

rb_encoding *enc = env_encoding();
ENV_LOCKING() {
char **env = GET_ENVIRON(environ);
while (*env) {
char *s = strchr(*env, '=');
if (s) {
rb_ary_push(ary, rb_assoc_new(env_str_new(*env, s-*env),
env_str_new2(s+1)));
rb_ary_push(ary, rb_assoc_new(env_str_new(*env, s-*env, enc),
env_str_new2(s+1, enc)));
}
env++;
}
Expand Down Expand Up @@ -6509,14 +6513,15 @@ env_key(VALUE dmy, VALUE value)
StringValue(value);
VALUE str = Qnil;

rb_encoding *enc = env_encoding();
ENV_LOCKING() {
char **env = GET_ENVIRON(environ);
while (*env) {
char *s = strchr(*env, '=');
if (s++) {
long len = strlen(s);
if (RSTRING_LEN(value) == len && strncmp(s, RSTRING_PTR(value), len) == 0) {
str = env_str_new(*env, s-*env-1);
str = env_str_new(*env, s-*env-1, enc);
break;
}
}
Expand All @@ -6533,13 +6538,14 @@ env_to_hash(void)
{
VALUE hash = rb_hash_new();

rb_encoding *enc = env_encoding();
ENV_LOCKING() {
char **env = GET_ENVIRON(environ);
while (*env) {
char *s = strchr(*env, '=');
if (s) {
rb_hash_aset(hash, env_str_new(*env, s-*env),
env_str_new2(s+1));
rb_hash_aset(hash, env_str_new(*env, s-*env, enc),
env_str_new2(s+1, enc));
}
env++;
}
Expand Down Expand Up @@ -6684,14 +6690,15 @@ env_shift(VALUE _)
VALUE result = Qnil;
VALUE key = Qnil;

rb_encoding *enc = env_encoding();
ENV_LOCKING() {
char **env = GET_ENVIRON(environ);
if (*env) {
const char *p = *env;
char *s = strchr(p, '=');
if (s) {
key = env_str_new(p, s-p);
VALUE val = env_str_new2(getenv(RSTRING_PTR(key)));
key = env_str_new(p, s-p, enc);
VALUE val = env_str_new2(getenv(RSTRING_PTR(key)), enc);
result = rb_assoc_new(key, val);
}
}
Expand Down
1 change: 1 addition & 0 deletions include/ruby/internal/encoding/encoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ enum ruby_encoding_consts {
#define ENCODING_INLINE_MAX RUBY_ENCODING_INLINE_MAX /**< @old{RUBY_ENCODING_INLINE_MAX} */
#define ENCODING_SHIFT RUBY_ENCODING_SHIFT /**< @old{RUBY_ENCODING_SHIFT} */
#define ENCODING_MASK RUBY_ENCODING_MASK /**< @old{RUBY_ENCODING_MASK} */
#define ENCODING_NAMELEN_MAX 63

/**
* Destructively assigns the passed encoding to the passed object. The object
Expand Down
8 changes: 8 additions & 0 deletions include/ruby/st.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,14 @@ CONSTFUNC(st_index_t rb_st_hash_start(st_index_t h));

void rb_hash_bulk_insert_into_st_table(long, const VALUE *, VALUE);

VALUE rb_managed_st_table_create_numtable(size_t capa);
VALUE rb_managed_st_table_create_strtable(size_t capa);
VALUE rb_managed_st_table_create_strcasetable(size_t capa);
int rb_managed_st_table_lookup(VALUE tbl, st_data_t key, st_data_t *value);
int rb_managed_st_table_insert(VALUE tbl, st_data_t key, st_data_t value);
void rb_managed_st_table_add_direct(VALUE tbl, st_data_t key, st_data_t value);
VALUE rb_managed_st_table_dup(VALUE old_table);

RUBY_SYMBOL_EXPORT_END

#if defined(__cplusplus)
Expand Down
Loading
Loading