Skip to content

Commit f048f77

Browse files
committed
Extract enc_load_from_base from enc_register_at
Previously we would sometimes call enc_register_at several times in order to update the encoding values after the base encoding may have been loaded. This updates enc_register_at to only be used via enc_register, when an actual new encoding at a new index is being registered. Other callers (which in all cases found the index by the name matching) now call enc_load_from_base which is only responsibly for loading the encoding from the base values.
1 parent 7c51ce5 commit f048f77

File tree

1 file changed

+55
-38
lines changed

1 file changed

+55
-38
lines changed

encoding.c

Lines changed: 55 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,35 @@ enc_table_expand(struct enc_table *enc_table, int newsize)
351351
return newsize;
352352
}
353353

354+
/* Load an encoding using the values from base_encoding */
355+
static void
356+
enc_load_from_base(struct enc_table *enc_table, int index, rb_encoding *base_encoding)
357+
{
358+
ASSERT_vm_locking();
359+
360+
struct rb_encoding_entry *ent = &enc_table->list[index];
361+
362+
if (ent->loaded) {
363+
return;
364+
}
365+
366+
rb_raw_encoding *encoding = (rb_raw_encoding *)ent->enc;
367+
RUBY_ASSERT(encoding);
368+
369+
// FIXME: Before the base is loaded, the encoding may be accessed
370+
// concurrently by other Ractors.
371+
// We're copying all fields from base_encoding except name and
372+
// ruby_encoding_index which we preserve from the original. Since these are
373+
// the only fields other threads should read it is likely safe despite
374+
// technically being a data race.
375+
rb_raw_encoding tmp_encoding = *base_encoding;
376+
tmp_encoding.name = encoding->name;
377+
tmp_encoding.ruby_encoding_index = encoding->ruby_encoding_index;
378+
*encoding = tmp_encoding;
379+
380+
RUBY_ATOMIC_SET(ent->loaded, encoding->max_enc_len);
381+
}
382+
354383
static int
355384
enc_register_at(struct enc_table *enc_table, int index, const char *name, rb_encoding *base_encoding)
356385
{
@@ -359,45 +388,33 @@ enc_register_at(struct enc_table *enc_table, int index, const char *name, rb_enc
359388
struct rb_encoding_entry *ent = &enc_table->list[index];
360389
rb_raw_encoding *encoding;
361390

362-
if (ent->loaded) {
363-
RUBY_ASSERT(ent->base == base_encoding);
364-
RUBY_ASSERT(!strcmp(name, ent->name));
365-
return index;
366-
}
391+
RUBY_ASSERT(!ent->loaded);
392+
RUBY_ASSERT(!ent->name);
393+
RUBY_ASSERT(!ent->enc);
394+
RUBY_ASSERT(!ent->base);
367395

368-
if (!valid_encoding_name_p(name)) return -1;
369-
if (!ent->name) {
370-
ent->name = name = strdup(name);
371-
}
372-
else if (STRCASECMP(name, ent->name)) {
373-
return -1;
374-
}
375-
encoding = (rb_raw_encoding *)ent->enc;
376-
if (!encoding) {
377-
encoding = xmalloc(sizeof(rb_encoding));
378-
}
396+
RUBY_ASSERT(valid_encoding_name_p(name));
379397

380-
rb_raw_encoding tmp_encoding;
381-
if (base_encoding) {
382-
tmp_encoding = *base_encoding;
383-
}
384-
else {
385-
memset(&tmp_encoding, 0, sizeof(*ent->enc));
386-
}
387-
tmp_encoding.name = name;
388-
tmp_encoding.ruby_encoding_index = index;
398+
ent->name = name = strdup(name);
389399

390-
// FIXME: If encoding already existed, it may be concurrently accessed
391-
// It's technically invalid to write to this memory as it's read, but as all
392-
// values are set up it _probably_ works.
393-
*encoding = tmp_encoding;
400+
encoding = ZALLOC(rb_raw_encoding);
401+
encoding->name = name;
402+
encoding->ruby_encoding_index = index;
394403
ent->enc = encoding;
395-
st_insert(enc_table->names, (st_data_t)name, (st_data_t)index);
404+
405+
if (st_insert(enc_table->names, (st_data_t)name, (st_data_t)index)) {
406+
rb_bug("encoding name was somehow registered twice");
407+
}
396408

397409
enc_list_update(index, encoding);
398410

399-
// max_enc_len is used to mark a fully loaded encoding.
400-
RUBY_ATOMIC_SET(ent->loaded, encoding->max_enc_len);
411+
if (base_encoding) {
412+
enc_load_from_base(enc_table, index, base_encoding);
413+
}
414+
else {
415+
/* it should not be loaded yet */
416+
RUBY_ASSERT(!encoding->max_enc_len);
417+
}
401418

402419
return index;
403420
}
@@ -407,6 +424,8 @@ enc_register(struct enc_table *enc_table, const char *name, rb_encoding *encodin
407424
{
408425
ASSERT_vm_locking();
409426

427+
if (!valid_encoding_name_p(name)) return -1;
428+
410429
int index = enc_table->count;
411430

412431
enc_table->count = enc_table_expand(enc_table, index + 1);
@@ -447,7 +466,7 @@ rb_enc_register(const char *name, rb_encoding *encoding)
447466
index = enc_register(enc_table, name, encoding);
448467
}
449468
else if (rb_enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) {
450-
enc_register_at(enc_table, index, name, encoding);
469+
enc_load_from_base(enc_table, index, encoding);
451470
}
452471
else {
453472
rb_raise(rb_eArgError, "encoding %s is already registered", name);
@@ -564,7 +583,7 @@ enc_replicate_with_index(struct enc_table *enc_table, const char *name, rb_encod
564583
idx = enc_register(enc_table, name, origenc);
565584
}
566585
else {
567-
idx = enc_register_at(enc_table, idx, name, origenc);
586+
enc_load_from_base(enc_table, idx, origenc);
568587
}
569588
if (idx >= 0) {
570589
set_base_encoding(enc_table, idx, origenc);
@@ -841,13 +860,11 @@ enc_autoload_body(rb_encoding *enc)
841860

842861
if (do_register) {
843862
GLOBAL_ENC_TABLE_LOCKING(enc_table) {
844-
i = enc->ruby_encoding_index;
845-
enc_register_at(enc_table, i & ENC_INDEX_MASK, rb_enc_name(enc), base);
863+
i = ENC_TO_ENCINDEX(enc);
864+
enc_load_from_base(enc_table, i, base);
846865
RUBY_ASSERT(((rb_raw_encoding *)enc)->ruby_encoding_index == i);
847866
}
848867
}
849-
850-
i &= ENC_INDEX_MASK;
851868
}
852869
else {
853870
i = -2;

0 commit comments

Comments
 (0)