@@ -351,6 +351,35 @@ enc_table_expand(struct enc_table *enc_table, int newsize)
351351 return newsize ;
352352}
353353
354+ /* Load an encoding using the values from base_encoding */
355+ static void
356+ enc_load_from_base (struct enc_table * enc_table , int index , rb_encoding * base_encoding )
357+ {
358+ ASSERT_vm_locking ();
359+
360+ struct rb_encoding_entry * ent = & enc_table -> list [index ];
361+
362+ if (ent -> loaded ) {
363+ return ;
364+ }
365+
366+ rb_raw_encoding * encoding = (rb_raw_encoding * )ent -> enc ;
367+ RUBY_ASSERT (encoding );
368+
369+ // FIXME: Before the base is loaded, the encoding may be accessed
370+ // concurrently by other Ractors.
371+ // We're copying all fields from base_encoding except name and
372+ // ruby_encoding_index which we preserve from the original. Since these are
373+ // the only fields other threads should read it is likely safe despite
374+ // technically being a data race.
375+ rb_raw_encoding tmp_encoding = * base_encoding ;
376+ tmp_encoding .name = encoding -> name ;
377+ tmp_encoding .ruby_encoding_index = encoding -> ruby_encoding_index ;
378+ * encoding = tmp_encoding ;
379+
380+ RUBY_ATOMIC_SET (ent -> loaded , encoding -> max_enc_len );
381+ }
382+
354383static int
355384enc_register_at (struct enc_table * enc_table , int index , const char * name , rb_encoding * base_encoding )
356385{
@@ -359,45 +388,33 @@ enc_register_at(struct enc_table *enc_table, int index, const char *name, rb_enc
359388 struct rb_encoding_entry * ent = & enc_table -> list [index ];
360389 rb_raw_encoding * encoding ;
361390
362- if (ent -> loaded ) {
363- RUBY_ASSERT (ent -> base == base_encoding );
364- RUBY_ASSERT (!strcmp (name , ent -> name ));
365- return index ;
366- }
391+ RUBY_ASSERT (!ent -> loaded );
392+ RUBY_ASSERT (!ent -> name );
393+ RUBY_ASSERT (!ent -> enc );
394+ RUBY_ASSERT (!ent -> base );
367395
368- if (!valid_encoding_name_p (name )) return -1 ;
369- if (!ent -> name ) {
370- ent -> name = name = strdup (name );
371- }
372- else if (STRCASECMP (name , ent -> name )) {
373- return -1 ;
374- }
375- encoding = (rb_raw_encoding * )ent -> enc ;
376- if (!encoding ) {
377- encoding = xmalloc (sizeof (rb_encoding ));
378- }
396+ RUBY_ASSERT (valid_encoding_name_p (name ));
379397
380- rb_raw_encoding tmp_encoding ;
381- if (base_encoding ) {
382- tmp_encoding = * base_encoding ;
383- }
384- else {
385- memset (& tmp_encoding , 0 , sizeof (* ent -> enc ));
386- }
387- tmp_encoding .name = name ;
388- tmp_encoding .ruby_encoding_index = index ;
398+ ent -> name = name = strdup (name );
389399
390- // FIXME: If encoding already existed, it may be concurrently accessed
391- // It's technically invalid to write to this memory as it's read, but as all
392- // values are set up it _probably_ works.
393- * encoding = tmp_encoding ;
400+ encoding = ZALLOC (rb_raw_encoding );
401+ encoding -> name = name ;
402+ encoding -> ruby_encoding_index = index ;
394403 ent -> enc = encoding ;
395- st_insert (enc_table -> names , (st_data_t )name , (st_data_t )index );
404+
405+ if (st_insert (enc_table -> names , (st_data_t )name , (st_data_t )index )) {
406+ rb_bug ("encoding name was somehow registered twice" );
407+ }
396408
397409 enc_list_update (index , encoding );
398410
399- // max_enc_len is used to mark a fully loaded encoding.
400- RUBY_ATOMIC_SET (ent -> loaded , encoding -> max_enc_len );
411+ if (base_encoding ) {
412+ enc_load_from_base (enc_table , index , base_encoding );
413+ }
414+ else {
415+ /* it should not be loaded yet */
416+ RUBY_ASSERT (!encoding -> max_enc_len );
417+ }
401418
402419 return index ;
403420}
@@ -407,6 +424,8 @@ enc_register(struct enc_table *enc_table, const char *name, rb_encoding *encodin
407424{
408425 ASSERT_vm_locking ();
409426
427+ if (!valid_encoding_name_p (name )) return -1 ;
428+
410429 int index = enc_table -> count ;
411430
412431 enc_table -> count = enc_table_expand (enc_table , index + 1 );
@@ -447,7 +466,7 @@ rb_enc_register(const char *name, rb_encoding *encoding)
447466 index = enc_register (enc_table , name , encoding );
448467 }
449468 else if (rb_enc_autoload_p (oldenc ) || !ENC_DUMMY_P (oldenc )) {
450- enc_register_at (enc_table , index , name , encoding );
469+ enc_load_from_base (enc_table , index , encoding );
451470 }
452471 else {
453472 rb_raise (rb_eArgError , "encoding %s is already registered" , name );
@@ -564,7 +583,7 @@ enc_replicate_with_index(struct enc_table *enc_table, const char *name, rb_encod
564583 idx = enc_register (enc_table , name , origenc );
565584 }
566585 else {
567- idx = enc_register_at (enc_table , idx , name , origenc );
586+ enc_load_from_base (enc_table , idx , origenc );
568587 }
569588 if (idx >= 0 ) {
570589 set_base_encoding (enc_table , idx , origenc );
@@ -841,13 +860,11 @@ enc_autoload_body(rb_encoding *enc)
841860
842861 if (do_register ) {
843862 GLOBAL_ENC_TABLE_LOCKING (enc_table ) {
844- i = enc -> ruby_encoding_index ;
845- enc_register_at (enc_table , i & ENC_INDEX_MASK , rb_enc_name ( enc ) , base );
863+ i = ENC_TO_ENCINDEX ( enc ) ;
864+ enc_load_from_base (enc_table , i , base );
846865 RUBY_ASSERT (((rb_raw_encoding * )enc )-> ruby_encoding_index == i );
847866 }
848867 }
849-
850- i &= ENC_INDEX_MASK ;
851868 }
852869 else {
853870 i = -2 ;
0 commit comments