|
5 | 5 | #define RB_UNLIKELY(cond) (cond) |
6 | 6 | #endif |
7 | 7 |
|
8 | | -static VALUE mJSON, cState, mString_Extend, eGeneratorError, eNestingError; |
| 8 | +static VALUE mJSON, cState, mString_Extend, eGeneratorError, eNestingError, Encoding_UTF_8; |
9 | 9 |
|
10 | | -static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend; |
| 10 | +static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode; |
11 | 11 |
|
12 | 12 | /* Converts in_string to a JSON string (without the wrapping '"' |
13 | 13 | * characters) in FBuffer out_buffer. |
@@ -735,20 +735,41 @@ static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_St |
735 | 735 | fbuffer_append_char(buffer, ']'); |
736 | 736 | } |
737 | 737 |
|
738 | | -static int usascii_encindex, utf8_encindex; |
| 738 | +static int usascii_encindex, utf8_encindex, binary_encindex; |
739 | 739 |
|
740 | | -static int enc_utf8_compatible_p(int enc_idx) |
| 740 | +static inline int enc_utf8_compatible_p(int enc_idx) |
741 | 741 | { |
742 | 742 | if (enc_idx == usascii_encindex) return 1; |
743 | 743 | if (enc_idx == utf8_encindex) return 1; |
744 | 744 | return 0; |
745 | 745 | } |
746 | 746 |
|
| 747 | +static inline VALUE ensure_valid_encoding(VALUE str) |
| 748 | +{ |
| 749 | + int encindex = RB_ENCODING_GET(str); |
| 750 | + VALUE utf8_string; |
| 751 | + if (RB_UNLIKELY(!enc_utf8_compatible_p(encindex))) { |
| 752 | + if (encindex == binary_encindex) { |
| 753 | + // For historical reason, we silently reinterpret binary strings as UTF-8 if it would work. |
| 754 | + // TODO: Deprecate in 2.8.0 |
| 755 | + // TODO: Remove in 3.0.0 |
| 756 | + utf8_string = rb_enc_associate_index(rb_str_dup(str), utf8_encindex); |
| 757 | + switch (rb_enc_str_coderange(utf8_string)) { |
| 758 | + case ENC_CODERANGE_7BIT: |
| 759 | + case ENC_CODERANGE_VALID: |
| 760 | + return utf8_string; |
| 761 | + break; |
| 762 | + } |
| 763 | + } |
| 764 | + |
| 765 | + str = rb_funcall(str, i_encode, 1, Encoding_UTF_8); |
| 766 | + } |
| 767 | + return str; |
| 768 | +} |
| 769 | + |
747 | 770 | static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) |
748 | 771 | { |
749 | | - if (!enc_utf8_compatible_p(RB_ENCODING_GET(obj))) { |
750 | | - obj = rb_str_export_to_enc(obj, rb_utf8_encoding()); |
751 | | - } |
| 772 | + obj = ensure_valid_encoding(obj); |
752 | 773 |
|
753 | 774 | fbuffer_append_char(buffer, '"'); |
754 | 775 |
|
@@ -1462,14 +1483,19 @@ void Init_generator(void) |
1462 | 1483 | VALUE mNilClass = rb_define_module_under(mGeneratorMethods, "NilClass"); |
1463 | 1484 | rb_define_method(mNilClass, "to_json", mNilClass_to_json, -1); |
1464 | 1485 |
|
| 1486 | + rb_global_variable(&Encoding_UTF_8); |
| 1487 | + Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); |
| 1488 | + |
1465 | 1489 | i_to_s = rb_intern("to_s"); |
1466 | 1490 | i_to_json = rb_intern("to_json"); |
1467 | 1491 | i_new = rb_intern("new"); |
1468 | 1492 | i_pack = rb_intern("pack"); |
1469 | 1493 | i_unpack = rb_intern("unpack"); |
1470 | 1494 | i_create_id = rb_intern("create_id"); |
1471 | 1495 | i_extend = rb_intern("extend"); |
| 1496 | + i_encode = rb_intern("encode"); |
1472 | 1497 |
|
1473 | 1498 | usascii_encindex = rb_usascii_encindex(); |
1474 | 1499 | utf8_encindex = rb_utf8_encindex(); |
| 1500 | + binary_encindex = rb_ascii8bit_encindex(); |
1475 | 1501 | } |
0 commit comments