1
1
use std:: borrow:: Cow ;
2
2
use std:: fmt:: Debug ;
3
+ use std:: io:: { self , Write } ;
3
4
4
5
use pyo3:: exceptions:: PyTypeError ;
5
6
use pyo3:: prelude:: * ;
@@ -9,7 +10,7 @@ use pyo3::{intern, PyTraverseError, PyVisit};
9
10
10
11
use enum_dispatch:: enum_dispatch;
11
12
use serde:: Serialize ;
12
- use serde_json:: ser:: PrettyFormatter ;
13
+ use serde_json:: ser:: { Formatter , PrettyFormatter } ;
13
14
14
15
use crate :: build_tools:: py_schema_err;
15
16
use crate :: build_tools:: py_schema_error_type;
@@ -432,6 +433,87 @@ impl Serialize for PydanticSerializer<'_> {
432
433
}
433
434
}
434
435
436
+ struct EscapeNonAsciiFormatter ;
437
+
438
+ impl Formatter for EscapeNonAsciiFormatter {
439
+ fn write_string_fragment < W : ?Sized + Write > ( & mut self , writer : & mut W , fragment : & str ) -> io:: Result < ( ) > {
440
+ let mut input = fragment;
441
+
442
+ while let Some ( ( idx, non_ascii_char) ) = input. chars ( ) . enumerate ( ) . find ( |( _, c) | !c. is_ascii ( ) ) {
443
+ if idx > 0 {
444
+ // write all ascii characters before the non-ascii one
445
+ let ascii_run = & input[ ..idx] ;
446
+ writer. write_all ( ascii_run. as_bytes ( ) ) . unwrap ( ) ;
447
+ }
448
+
449
+ let codepoint = non_ascii_char as u32 ;
450
+ if codepoint < 0xFFFF {
451
+ // write basic codepoint as single escape
452
+ write ! ( writer, "\\ u{codepoint:04x}" ) . unwrap ( ) ;
453
+ } else {
454
+ // encode extended plane character as utf16 pair
455
+ for escape in non_ascii_char. encode_utf16 ( & mut [ 0 ; 2 ] ) {
456
+ write ! ( writer, "\\ u{escape:04x}" ) . unwrap ( ) ;
457
+ }
458
+ }
459
+
460
+ input = & input[ ( idx + non_ascii_char. len_utf8 ( ) ) ..] ;
461
+ }
462
+
463
+ // write any ascii trailer
464
+ writer. write_all ( input. as_bytes ( ) ) ?;
465
+ Ok ( ( ) )
466
+ }
467
+ }
468
+
469
+ struct EscapeNonAsciiPrettyFormatter < ' a > {
470
+ pretty : PrettyFormatter < ' a > ,
471
+ escape_non_ascii : EscapeNonAsciiFormatter ,
472
+ }
473
+
474
+ impl < ' a > EscapeNonAsciiPrettyFormatter < ' a > {
475
+ pub fn with_indent ( indent : & ' a [ u8 ] ) -> Self {
476
+ Self {
477
+ pretty : PrettyFormatter :: with_indent ( indent) ,
478
+ escape_non_ascii : EscapeNonAsciiFormatter ,
479
+ }
480
+ }
481
+ }
482
+
483
+ macro_rules! defer {
484
+ ( $formatter: ident, $fun: ident) => {
485
+ fn $fun<W >( & mut self , writer: & mut W ) -> io:: Result <( ) >
486
+ where
487
+ W : ?Sized + io:: Write ,
488
+ {
489
+ self . $formatter. $fun( writer)
490
+ }
491
+ } ;
492
+ ( $formatter: ident, $fun: ident, $val: ty) => {
493
+ fn $fun<W >( & mut self , writer: & mut W , val: $val) -> io:: Result <( ) >
494
+ where
495
+ W : ?Sized + io:: Write ,
496
+ {
497
+ self . $formatter. $fun( writer, val)
498
+ }
499
+ } ;
500
+ }
501
+
502
+ #[ allow( clippy:: needless_lifetimes) ]
503
+ impl Formatter for EscapeNonAsciiPrettyFormatter < ' _ > {
504
+ defer ! ( escape_non_ascii, write_string_fragment, & str ) ;
505
+ defer ! ( pretty, begin_array) ;
506
+ defer ! ( pretty, end_array) ;
507
+ defer ! ( pretty, begin_array_value, bool ) ;
508
+ defer ! ( pretty, end_array_value) ;
509
+ defer ! ( pretty, begin_object) ;
510
+ defer ! ( pretty, end_object) ;
511
+ defer ! ( pretty, begin_object_key, bool ) ;
512
+ defer ! ( pretty, end_object_key) ;
513
+ defer ! ( pretty, begin_object_value) ;
514
+ defer ! ( pretty, end_object_value) ;
515
+ }
516
+
435
517
#[ allow( clippy:: too_many_arguments) ]
436
518
pub ( crate ) fn to_json_bytes (
437
519
value : & Bound < ' _ , PyAny > ,
@@ -440,25 +522,40 @@ pub(crate) fn to_json_bytes(
440
522
exclude : Option < & Bound < ' _ , PyAny > > ,
441
523
extra : & Extra ,
442
524
indent : Option < usize > ,
525
+ ensure_ascii : bool ,
443
526
expected_json_size : usize ,
444
527
) -> PyResult < Vec < u8 > > {
445
528
let serializer = PydanticSerializer :: new ( value, serializer, include, exclude, extra) ;
446
529
447
530
let writer: Vec < u8 > = Vec :: with_capacity ( expected_json_size) ;
448
- let bytes = match indent {
449
- Some ( indent) => {
531
+
532
+ let bytes = match ( indent, ensure_ascii) {
533
+ ( Some ( indent) , true ) => {
534
+ let indent = vec ! [ b' ' ; indent] ;
535
+ let formatter = EscapeNonAsciiPrettyFormatter :: with_indent ( & indent) ;
536
+ let mut ser = PythonSerializer :: with_formatter ( writer, formatter) ;
537
+ serializer. serialize ( & mut ser) . map_err ( se_err_py_err) ?;
538
+ ser. into_inner ( )
539
+ }
540
+ ( Some ( indent) , false ) => {
450
541
let indent = vec ! [ b' ' ; indent] ;
451
542
let formatter = PrettyFormatter :: with_indent ( & indent) ;
452
543
let mut ser = PythonSerializer :: with_formatter ( writer, formatter) ;
453
544
serializer. serialize ( & mut ser) . map_err ( se_err_py_err) ?;
454
545
ser. into_inner ( )
455
546
}
456
- None => {
547
+ ( None , true ) => {
548
+ let mut ser = PythonSerializer :: with_formatter ( writer, EscapeNonAsciiFormatter ) ;
549
+ serializer. serialize ( & mut ser) . map_err ( se_err_py_err) ?;
550
+ ser. into_inner ( )
551
+ }
552
+ ( None , false ) => {
457
553
let mut ser = PythonSerializer :: new ( writer) ;
458
554
serializer. serialize ( & mut ser) . map_err ( se_err_py_err) ?;
459
555
ser. into_inner ( )
460
556
}
461
557
} ;
558
+
462
559
Ok ( bytes)
463
560
}
464
561
0 commit comments