@@ -2019,7 +2019,12 @@ rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len,
20192019 if (ptr_cr_ret )
20202020 * ptr_cr_ret = ptr_cr ;
20212021
2022- if (str_encindex != ptr_encindex &&
2022+ if (rb_encoding_compat &&
2023+ ((str_encindex == rb_utf8_encindex () && ptr_encindex == rb_ascii8bit_encindex ()) ||
2024+ (str_encindex == rb_ascii8bit_encindex () && ptr_encindex == rb_utf8_encindex ()))) {
2025+ /* fall through to conditional below */
2026+ }
2027+ else if (str_encindex != ptr_encindex &&
20232028 str_cr != ENC_CODERANGE_7BIT &&
20242029 ptr_cr != ENC_CODERANGE_7BIT ) {
20252030 incompatible :
@@ -2028,7 +2033,14 @@ rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len,
20282033 rb_enc_name (rb_enc_from_index (ptr_encindex )));
20292034 }
20302035
2031- if (str_cr == ENC_CODERANGE_UNKNOWN ) {
2036+ if (rb_encoding_compat &&
2037+ str_encindex != ptr_encindex &&
2038+ str_cr != ENC_CODERANGE_7BIT && ptr_cr != ENC_CODERANGE_7BIT ) {
2039+ /* from fall through above */
2040+ res_encindex = rb_ascii8bit_encindex ();
2041+ res_cr = ENC_CODERANGE_VALID ;
2042+ }
2043+ else if (str_cr == ENC_CODERANGE_UNKNOWN ) {
20322044 res_encindex = str_encindex ;
20332045 res_cr = ENC_CODERANGE_UNKNOWN ;
20342046 }
@@ -2240,6 +2252,8 @@ rb_str_hash(VALUE str)
22402252 if (e && rb_enc_str_coderange (str ) == ENC_CODERANGE_7BIT ) {
22412253 e = 0 ;
22422254 }
2255+ if (rb_encoding_compat && e == rb_utf8_encindex () || e == rb_ascii8bit_encindex ())
2256+ e = 0 ;
22432257 return rb_memhash ((const void * )RSTRING_PTR (str ), RSTRING_LEN (str )) ^ e ;
22442258}
22452259
@@ -2294,6 +2308,11 @@ rb_str_comparable(VALUE str1, VALUE str2)
22942308 if (rb_enc_asciicompat (rb_enc_from_index (idx1 )))
22952309 return TRUE;
22962310 }
2311+ if (rb_encoding_compat &&
2312+ ((idx1 == rb_utf8_encindex () && idx2 == rb_ascii8bit_encindex ()) ||
2313+ (idx1 == rb_ascii8bit_encindex () && idx2 == rb_utf8_encindex ()))) {
2314+ return TRUE;
2315+ }
22972316 return FALSE;
22982317}
22992318
@@ -6034,7 +6053,8 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
60346053 long slen = RSTRING_LEN (spat );
60356054
60366055 if (is_broken_string (str )) {
6037- rb_raise (rb_eArgError , "invalid byte sequence in %s" , rb_enc_name (STR_ENC_GET (str )));
6056+ if (!(rb_encoding_compat && STR_ENC_GET (str ) == rb_utf8_encoding ()))
6057+ rb_raise (rb_eArgError , "invalid byte sequence in %s" , rb_enc_name (STR_ENC_GET (str )));
60386058 }
60396059 if (is_broken_string (spat )) {
60406060 rb_raise (rb_eArgError , "invalid byte sequence in %s" , rb_enc_name (STR_ENC_GET (spat )));
0 commit comments