1+ """Enhanced serialization module with byte-aware encoding and class-based architecture.
2+
3+ Provides a Protocol-based serialization system that users can extend.
4+ Supports msgspec, orjson, and standard library JSON with automatic fallback.
5+ """
6+
7+ import contextlib
18import datetime
29import enum
3- from typing import Any
10+ import json
11+ from abc import ABC , abstractmethod
12+ from typing import Any , Final , Literal , Optional , Protocol , Union , overload
413
5- from sqlspec .typing import PYDANTIC_INSTALLED , BaseModel
14+ from sqlspec .typing import MSGSPEC_INSTALLED , ORJSON_INSTALLED , PYDANTIC_INSTALLED , BaseModel
615
716
817def _type_to_string (value : Any ) -> str : # pragma: no cover
18+ """Convert special types to strings for JSON serialization.
19+
20+ Args:
21+ value: Value to convert.
22+
23+ Returns:
24+ String representation of the value.
25+ """
926 if isinstance (value , datetime .datetime ):
1027 return convert_datetime_to_gmt_iso (value )
1128 if isinstance (value , datetime .date ):
@@ -20,35 +37,206 @@ def _type_to_string(value: Any) -> str: # pragma: no cover
2037 raise TypeError from exc
2138
2239
23- try :
24- from msgspec . json import Decoder , Encoder
40+ class JSONSerializer ( Protocol ) :
41+ """Protocol for JSON serialization implementations.
2542
26- encoder , decoder = Encoder ( enc_hook = _type_to_string ), Decoder ()
27- decode_json = decoder . decode
43+ Users can implement this protocol to create custom serializers.
44+ """
2845
29- def encode_json ( data : Any ) -> str : # pragma: no cover
30- return encoder . encode ( data ). decode ( "utf-8" )
46+ def encode ( self , data : Any , * , as_bytes : bool = False ) -> Union [ str , bytes ]:
47+ """Encode data to JSON.
3148
32- except ImportError :
33- try :
34- from orjson import ( # pyright: ignore[reportMissingImports]
49+ Args:
50+ data: Data to encode.
51+ as_bytes: Whether to return bytes instead of string.
52+
53+ Returns:
54+ JSON string or bytes depending on as_bytes parameter.
55+ """
56+ ...
57+
58+ def decode (self , data : Union [str , bytes ], * , decode_bytes : bool = True ) -> Any :
59+ """Decode from JSON.
60+
61+ Args:
62+ data: JSON string or bytes to decode.
63+ decode_bytes: Whether to decode bytes input.
64+
65+ Returns:
66+ Decoded Python object.
67+ """
68+ ...
69+
70+
71+ class BaseJSONSerializer (ABC ):
72+ """Base class for JSON serializers with common functionality."""
73+
74+ __slots__ = ()
75+
76+ @abstractmethod
77+ def encode (self , data : Any , * , as_bytes : bool = False ) -> Union [str , bytes ]:
78+ """Encode data to JSON."""
79+ ...
80+
81+ @abstractmethod
82+ def decode (self , data : Union [str , bytes ], * , decode_bytes : bool = True ) -> Any :
83+ """Decode from JSON."""
84+ ...
85+
86+
87+ class MsgspecSerializer (BaseJSONSerializer ):
88+ """Msgspec-based JSON serializer for optimal performance."""
89+
90+ __slots__ = ("_decoder" , "_encoder" )
91+
92+ def __init__ (self ) -> None :
93+ """Initialize msgspec encoder and decoder."""
94+ from msgspec .json import Decoder , Encoder
95+
96+ self ._encoder : Final [Encoder ] = Encoder (enc_hook = _type_to_string )
97+ self ._decoder : Final [Decoder ] = Decoder ()
98+
99+ def encode (self , data : Any , * , as_bytes : bool = False ) -> Union [str , bytes ]:
100+ """Encode data using msgspec."""
101+ try :
102+ if as_bytes :
103+ return self ._encoder .encode (data )
104+ return self ._encoder .encode (data ).decode ("utf-8" )
105+ except (TypeError , ValueError ):
106+ if ORJSON_INSTALLED :
107+ return OrjsonSerializer ().encode (data , as_bytes = as_bytes )
108+ return StandardLibSerializer ().encode (data , as_bytes = as_bytes )
109+
110+ def decode (self , data : Union [str , bytes ], * , decode_bytes : bool = True ) -> Any :
111+ """Decode data using msgspec."""
112+ if isinstance (data , bytes ):
113+ if decode_bytes :
114+ try :
115+ return self ._decoder .decode (data )
116+ except (TypeError , ValueError ):
117+ if ORJSON_INSTALLED :
118+ return OrjsonSerializer ().decode (data , decode_bytes = decode_bytes )
119+ return StandardLibSerializer ().decode (data , decode_bytes = decode_bytes )
120+ return data
121+
122+ try :
123+ return self ._decoder .decode (data .encode ("utf-8" ))
124+ except (TypeError , ValueError ):
125+ if ORJSON_INSTALLED :
126+ return OrjsonSerializer ().decode (data , decode_bytes = decode_bytes )
127+ return StandardLibSerializer ().decode (data , decode_bytes = decode_bytes )
128+
129+
130+ class OrjsonSerializer (BaseJSONSerializer ):
131+ """Orjson-based JSON serializer with native datetime/UUID support."""
132+
133+ __slots__ = ()
134+
135+ def encode (self , data : Any , * , as_bytes : bool = False ) -> Union [str , bytes ]:
136+ """Encode data using orjson."""
137+ from orjson import (
35138 OPT_NAIVE_UTC , # pyright: ignore[reportUnknownVariableType]
36139 OPT_SERIALIZE_NUMPY , # pyright: ignore[reportUnknownVariableType]
37140 OPT_SERIALIZE_UUID , # pyright: ignore[reportUnknownVariableType]
38141 )
39- from orjson import dumps as _encode_json # pyright: ignore[reportUnknownVariableType,reportMissingImports]
40- from orjson import loads as decode_json # type: ignore[no-redef,assignment,unused-ignore]
142+ from orjson import dumps as _orjson_dumps # pyright: ignore[reportMissingImports]
143+
144+ result = _orjson_dumps (
145+ data , default = _type_to_string , option = OPT_SERIALIZE_NUMPY | OPT_NAIVE_UTC | OPT_SERIALIZE_UUID
146+ )
147+ return result if as_bytes else result .decode ("utf-8" )
148+
149+ def decode (self , data : Union [str , bytes ], * , decode_bytes : bool = True ) -> Any :
150+ """Decode data using orjson."""
151+ from orjson import loads as _orjson_loads # pyright: ignore[reportMissingImports]
152+
153+ if isinstance (data , bytes ):
154+ if decode_bytes :
155+ return _orjson_loads (data )
156+ return data
157+ return _orjson_loads (data )
41158
42- def encode_json (data : Any ) -> str : # pragma: no cover
43- return _encode_json (
44- data , default = _type_to_string , option = OPT_SERIALIZE_NUMPY | OPT_NAIVE_UTC | OPT_SERIALIZE_UUID
45- ).decode ("utf-8" )
46159
47- except ImportError :
48- from json import dumps as encode_json # type: ignore[assignment]
49- from json import loads as decode_json # type: ignore[assignment]
160+ class StandardLibSerializer (BaseJSONSerializer ):
161+ """Standard library JSON serializer as fallback."""
50162
51- __all__ = ("convert_date_to_iso" , "convert_datetime_to_gmt_iso" , "decode_json" , "encode_json" )
163+ __slots__ = ()
164+
165+ def encode (self , data : Any , * , as_bytes : bool = False ) -> Union [str , bytes ]:
166+ """Encode data using standard library json."""
167+ json_str = json .dumps (data , default = _type_to_string )
168+ return json_str .encode ("utf-8" ) if as_bytes else json_str
169+
170+ def decode (self , data : Union [str , bytes ], * , decode_bytes : bool = True ) -> Any :
171+ """Decode data using standard library json."""
172+ if isinstance (data , bytes ):
173+ if decode_bytes :
174+ return json .loads (data .decode ("utf-8" ))
175+ return data
176+ return json .loads (data )
177+
178+
179+ _default_serializer : Optional [JSONSerializer ] = None
180+
181+
182+ def get_default_serializer () -> JSONSerializer :
183+ """Get the default serializer based on available libraries.
184+
185+ Priority: msgspec > orjson > stdlib
186+
187+ Returns:
188+ The best available JSON serializer.
189+ """
190+ global _default_serializer
191+
192+ if _default_serializer is None :
193+ if MSGSPEC_INSTALLED :
194+ with contextlib .suppress (ImportError ):
195+ _default_serializer = MsgspecSerializer ()
196+
197+ if _default_serializer is None and ORJSON_INSTALLED :
198+ with contextlib .suppress (ImportError ):
199+ _default_serializer = OrjsonSerializer ()
200+
201+ if _default_serializer is None :
202+ _default_serializer = StandardLibSerializer ()
203+
204+ assert _default_serializer is not None
205+ return _default_serializer
206+
207+
208+ @overload
209+ def encode_json (data : Any , * , as_bytes : Literal [False ] = ...) -> str : ... # pragma: no cover
210+
211+
212+ @overload
213+ def encode_json (data : Any , * , as_bytes : Literal [True ]) -> bytes : ... # pragma: no cover
214+
215+
216+ def encode_json (data : Any , * , as_bytes : bool = False ) -> Union [str , bytes ]:
217+ """Encode to JSON, optionally returning bytes for optimal performance.
218+
219+ Args:
220+ data: The data to encode.
221+ as_bytes: Whether to return bytes instead of string.
222+
223+ Returns:
224+ JSON string or bytes depending on as_bytes parameter.
225+ """
226+ return get_default_serializer ().encode (data , as_bytes = as_bytes )
227+
228+
229+ def decode_json (data : Union [str , bytes ], * , decode_bytes : bool = True ) -> Any :
230+ """Decode from JSON string or bytes efficiently.
231+
232+ Args:
233+ data: JSON string or bytes to decode.
234+ decode_bytes: Whether to decode bytes input.
235+
236+ Returns:
237+ Decoded Python object.
238+ """
239+ return get_default_serializer ().decode (data , decode_bytes = decode_bytes )
52240
53241
54242def convert_datetime_to_gmt_iso (dt : datetime .datetime ) -> str : # pragma: no cover
@@ -75,3 +263,17 @@ def convert_date_to_iso(dt: datetime.date) -> str: # pragma: no cover
75263 The ISO formatted date string.
76264 """
77265 return dt .isoformat ()
266+
267+
268+ __all__ = (
269+ "BaseJSONSerializer" ,
270+ "JSONSerializer" ,
271+ "MsgspecSerializer" ,
272+ "OrjsonSerializer" ,
273+ "StandardLibSerializer" ,
274+ "convert_date_to_iso" ,
275+ "convert_datetime_to_gmt_iso" ,
276+ "decode_json" ,
277+ "encode_json" ,
278+ "get_default_serializer" ,
279+ )
0 commit comments