Skip to content

Commit fc5a710

Browse files
authored
fix u style unicode strings in python (#110)
1 parent 366d16a commit fc5a710

File tree

3 files changed

+29
-0
lines changed

3 files changed

+29
-0
lines changed

crates/jiter-python/tests/test_jiter.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
from decimal import Decimal
23

34
import jiter
@@ -239,3 +240,17 @@ def test_lossless_floats_int():
239240
v = jiter.from_json(b'123', lossless_floats=True)
240241
assert isinstance(v, int)
241242
assert v == 123
243+
244+
245+
def test_unicode_roundtrip():
246+
original = ['中文']
247+
json_data = json.dumps(original).encode()
248+
assert jiter.from_json(json_data) == original
249+
assert json.loads(json_data) == original
250+
251+
252+
def test_unicode_roundtrip_ensure_ascii():
253+
original = {'name': '中文'}
254+
json_data = json.dumps(original, ensure_ascii=False).encode()
255+
assert jiter.from_json(json_data, cache_mode=False) == original
256+
assert json.loads(json_data) == original

crates/jiter/src/string_decoder.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ fn decode_to_tape<'t, 'j>(
118118
b't' => tape.push(b'\t'),
119119
b'u' => {
120120
let (c, new_index) = parse_escape(data, index)?;
121+
ascii_only = false;
121122
index = new_index;
122123
tape.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
123124
}

crates/jiter/tests/main.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1594,3 +1594,16 @@ fn jiter_partial_string() {
15941594
JiterErrorType::JsonError(JsonErrorType::EofWhileParsingList)
15951595
);
15961596
}
1597+
1598+
#[test]
1599+
fn test_unicode_roundtrip() {
1600+
// '"中文"'
1601+
let json_bytes = b"\"\\u4e2d\\u6587\"";
1602+
let value = JsonValue::parse(json_bytes, false).unwrap();
1603+
let cow = match value {
1604+
JsonValue::Str(s) => s,
1605+
_ => panic!("expected string"),
1606+
};
1607+
assert_eq!(cow, "中文");
1608+
assert!(matches!(cow, Cow::Owned(_)));
1609+
}

0 commit comments

Comments
 (0)