1+ use ahash:: AHashSet ;
12use std:: marker:: PhantomData ;
23
34use pyo3:: exceptions:: PyValueError ;
@@ -21,6 +22,8 @@ use crate::JsonErrorType;
2122/// - `json_data`: The JSON data to parse.
2223/// - `allow_inf_nan`: Whether to allow `(-)Infinity` and `NaN` values.
2324/// - `cache_strings`: Whether to cache strings to avoid constructing new Python objects,
25+ /// - `allow_partial`: Whether to allow partial JSON data.
26+ /// - `catch_duplicate_keys`: Whether to catch duplicate keys in objects.
2427/// this should have a significant improvement on performance but increases memory slightly.
2528///
2629/// # Returns
@@ -32,11 +35,27 @@ pub fn python_parse<'py>(
3235 allow_inf_nan : bool ,
3336 cache_mode : StringCacheMode ,
3437 allow_partial : bool ,
38+ catch_duplicate_keys : bool ,
3539) -> JsonResult < Bound < ' py , PyAny > > {
40+ macro_rules! ppp {
41+ ( $string_cache: ident, $key_check: ident) => {
42+ PythonParser :: <$string_cache, $key_check>:: parse( py, json_data, allow_inf_nan, allow_partial)
43+ } ;
44+ }
45+
3646 match cache_mode {
37- StringCacheMode :: All => PythonParser :: < StringCacheAll > :: parse ( py, json_data, allow_inf_nan, allow_partial) ,
38- StringCacheMode :: Keys => PythonParser :: < StringCacheKeys > :: parse ( py, json_data, allow_inf_nan, allow_partial) ,
39- StringCacheMode :: None => PythonParser :: < StringNoCache > :: parse ( py, json_data, allow_inf_nan, allow_partial) ,
47+ StringCacheMode :: All => match catch_duplicate_keys {
48+ true => ppp ! ( StringCacheAll , DuplicateKeyCheck ) ,
49+ false => ppp ! ( StringCacheAll , NoopKeyCheck ) ,
50+ } ,
51+ StringCacheMode :: Keys => match catch_duplicate_keys {
52+ true => ppp ! ( StringCacheKeys , DuplicateKeyCheck ) ,
53+ false => ppp ! ( StringCacheKeys , NoopKeyCheck ) ,
54+ } ,
55+ StringCacheMode :: None => match catch_duplicate_keys {
56+ true => ppp ! ( StringNoCache , DuplicateKeyCheck ) ,
57+ false => ppp ! ( StringNoCache , NoopKeyCheck ) ,
58+ } ,
4059 }
4160}
4261
@@ -45,16 +64,17 @@ pub fn map_json_error(json_data: &[u8], json_error: &JsonError) -> PyErr {
4564 PyValueError :: new_err ( json_error. description ( json_data) )
4665}
4766
48- struct PythonParser < ' j , StringCache > {
67+ struct PythonParser < ' j , StringCache , KeyCheck > {
4968 _string_cache : PhantomData < StringCache > ,
69+ _key_check : PhantomData < KeyCheck > ,
5070 parser : Parser < ' j > ,
5171 tape : Tape ,
5272 recursion_limit : u8 ,
5373 allow_inf_nan : bool ,
5474 allow_partial : bool ,
5575}
5676
57- impl < ' j , StringCache : StringMaybeCache > PythonParser < ' j , StringCache > {
77+ impl < ' j , StringCache : StringMaybeCache , KeyCheck : MaybeKeyCheck > PythonParser < ' j , StringCache , KeyCheck > {
5878 fn parse < ' py > (
5979 py : Python < ' py > ,
6080 json_data : & [ u8 ] ,
@@ -63,6 +83,7 @@ impl<'j, StringCache: StringMaybeCache> PythonParser<'j, StringCache> {
6383 ) -> JsonResult < Bound < ' py , PyAny > > {
6484 let mut slf = PythonParser {
6585 _string_cache : PhantomData :: < StringCache > ,
86+ _key_check : PhantomData :: < KeyCheck > ,
6687 parser : Parser :: new ( json_data) ,
6788 tape : Tape :: default ( ) ,
6889 recursion_limit : DEFAULT_RECURSION_LIMIT ,
@@ -166,13 +187,18 @@ impl<'j, StringCache: StringMaybeCache> PythonParser<'j, StringCache> {
166187 panic ! ( "PyDict_SetItem failed" )
167188 }
168189 } ;
190+ let mut check_keys = KeyCheck :: default ( ) ;
169191 if let Some ( first_key) = self . parser . object_first :: < StringDecoder > ( & mut self . tape ) ? {
170- let first_key = StringCache :: get_key ( py, first_key. as_str ( ) , first_key. ascii_only ( ) ) ;
192+ let first_key_s = first_key. as_str ( ) ;
193+ check_keys. check ( first_key_s, self . parser . index ) ?;
194+ let first_key = StringCache :: get_key ( py, first_key_s, first_key. ascii_only ( ) ) ;
171195 let peek = self . parser . peek ( ) ?;
172196 let first_value = self . _check_take_value ( py, peek) ?;
173197 set_item ( first_key, first_value) ;
174198 while let Some ( key) = self . parser . object_step :: < StringDecoder > ( & mut self . tape ) ? {
175- let key = StringCache :: get_key ( py, key. as_str ( ) , key. ascii_only ( ) ) ;
199+ let key_s = key. as_str ( ) ;
200+ check_keys. check ( key_s, self . parser . index ) ?;
201+ let key = StringCache :: get_key ( py, key_s, key. ascii_only ( ) ) ;
176202 let peek = self . parser . peek ( ) ?;
177203 let value = self . _check_take_value ( py, peek) ?;
178204 set_item ( key, value) ;
@@ -209,3 +235,29 @@ impl<'j, StringCache: StringMaybeCache> PythonParser<'j, StringCache> {
209235 r
210236 }
211237}
238+
239+ trait MaybeKeyCheck : Default {
240+ fn check ( & mut self , key : & str , index : usize ) -> JsonResult < ( ) > ;
241+ }
242+
243+ #[ derive( Default ) ]
244+ struct NoopKeyCheck ;
245+
246+ impl MaybeKeyCheck for NoopKeyCheck {
247+ fn check ( & mut self , _key : & str , _index : usize ) -> JsonResult < ( ) > {
248+ Ok ( ( ) )
249+ }
250+ }
251+
252+ #[ derive( Default ) ]
253+ struct DuplicateKeyCheck ( AHashSet < String > ) ;
254+
255+ impl MaybeKeyCheck for DuplicateKeyCheck {
256+ fn check ( & mut self , key : & str , index : usize ) -> JsonResult < ( ) > {
257+ if self . 0 . insert ( key. to_owned ( ) ) {
258+ Ok ( ( ) )
259+ } else {
260+ Err ( JsonError :: new ( JsonErrorType :: DuplicateKey ( key. to_owned ( ) ) , index) )
261+ }
262+ }
263+ }
0 commit comments