Skip to content

Commit 90c54af

Browse files
authored
feat: enhanced serialization & type conversation process (#86)
1. Enhanced Serialization System (`sqlspec/_serialization.py`) - Protocol-based design with extensible serializer system - Byte-aware operations with `msgspec` → `orjson` → stdlib fallback - Overloaded functions for type-safe string/bytes output 2. Centralized Type Conversion (`sqlspec/core/type_conversion.py`) - Universal type detection with pre-compiled regex patterns - Performance optimized with quick pre-checks - Base classes for database-specific type handling 3. Database-Specific Enhancements - PostgreSQL: Cast-aware parameter preparation for JSON/JSONB - DuckDB: Native UUID support integration - BigQuery: Enhanced UUID support for standard and ADBC drivers - Oracle: Intelligent LOB parameter handling
1 parent cf89096 commit 90c54af

29 files changed

+2122
-512
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,6 @@ benchmarks/
6161
*.duckdb
6262
.crush
6363
CRUSH.md
64+
*.md
65+
!README.md
66+
!CONTRIBUTING.md

pyproject.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,13 @@ include = [
161161
"sqlspec/core/**/*.py", # Core module
162162
"sqlspec/loader.py", # Loader module
163163

164+
# === ADAPTER TYPE CONVERTERS ===
165+
"sqlspec/adapters/adbc/type_converter.py", # ADBC type converter
166+
"sqlspec/adapters/bigquery/type_converter.py", # BigQuery type converter
167+
"sqlspec/adapters/duckdb/type_converter.py", # DuckDB type converter
168+
"sqlspec/adapters/oracledb/type_converter.py", # Oracle type converter
169+
"sqlspec/adapters/psqlpy/type_converter.py", # Psqlpy type converter
170+
164171
# === UTILITY MODULES ===
165172
"sqlspec/utils/text.py", # Text utilities
166173
"sqlspec/utils/sync_tools.py", # Synchronous utility functions

sqlspec/_serialization.py

Lines changed: 223 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,28 @@
1+
"""Enhanced serialization module with byte-aware encoding and class-based architecture.
2+
3+
Provides a Protocol-based serialization system that users can extend.
4+
Supports msgspec, orjson, and standard library JSON with automatic fallback.
5+
"""
6+
7+
import contextlib
18
import datetime
29
import enum
3-
from typing import Any
10+
import json
11+
from abc import ABC, abstractmethod
12+
from typing import Any, Final, Literal, Optional, Protocol, Union, overload
413

5-
from sqlspec.typing import PYDANTIC_INSTALLED, BaseModel
14+
from sqlspec.typing import MSGSPEC_INSTALLED, ORJSON_INSTALLED, PYDANTIC_INSTALLED, BaseModel
615

716

817
def _type_to_string(value: Any) -> str: # pragma: no cover
18+
"""Convert special types to strings for JSON serialization.
19+
20+
Args:
21+
value: Value to convert.
22+
23+
Returns:
24+
String representation of the value.
25+
"""
926
if isinstance(value, datetime.datetime):
1027
return convert_datetime_to_gmt_iso(value)
1128
if isinstance(value, datetime.date):
@@ -20,35 +37,206 @@ def _type_to_string(value: Any) -> str: # pragma: no cover
2037
raise TypeError from exc
2138

2239

23-
try:
24-
from msgspec.json import Decoder, Encoder
40+
class JSONSerializer(Protocol):
41+
"""Protocol for JSON serialization implementations.
2542
26-
encoder, decoder = Encoder(enc_hook=_type_to_string), Decoder()
27-
decode_json = decoder.decode
43+
Users can implement this protocol to create custom serializers.
44+
"""
2845

29-
def encode_json(data: Any) -> str: # pragma: no cover
30-
return encoder.encode(data).decode("utf-8")
46+
def encode(self, data: Any, *, as_bytes: bool = False) -> Union[str, bytes]:
47+
"""Encode data to JSON.
3148
32-
except ImportError:
33-
try:
34-
from orjson import ( # pyright: ignore[reportMissingImports]
49+
Args:
50+
data: Data to encode.
51+
as_bytes: Whether to return bytes instead of string.
52+
53+
Returns:
54+
JSON string or bytes depending on as_bytes parameter.
55+
"""
56+
...
57+
58+
def decode(self, data: Union[str, bytes], *, decode_bytes: bool = True) -> Any:
59+
"""Decode from JSON.
60+
61+
Args:
62+
data: JSON string or bytes to decode.
63+
decode_bytes: Whether to decode bytes input.
64+
65+
Returns:
66+
Decoded Python object.
67+
"""
68+
...
69+
70+
71+
class BaseJSONSerializer(ABC):
72+
"""Base class for JSON serializers with common functionality."""
73+
74+
__slots__ = ()
75+
76+
@abstractmethod
77+
def encode(self, data: Any, *, as_bytes: bool = False) -> Union[str, bytes]:
78+
"""Encode data to JSON."""
79+
...
80+
81+
@abstractmethod
82+
def decode(self, data: Union[str, bytes], *, decode_bytes: bool = True) -> Any:
83+
"""Decode from JSON."""
84+
...
85+
86+
87+
class MsgspecSerializer(BaseJSONSerializer):
88+
"""Msgspec-based JSON serializer for optimal performance."""
89+
90+
__slots__ = ("_decoder", "_encoder")
91+
92+
def __init__(self) -> None:
93+
"""Initialize msgspec encoder and decoder."""
94+
from msgspec.json import Decoder, Encoder
95+
96+
self._encoder: Final[Encoder] = Encoder(enc_hook=_type_to_string)
97+
self._decoder: Final[Decoder] = Decoder()
98+
99+
def encode(self, data: Any, *, as_bytes: bool = False) -> Union[str, bytes]:
100+
"""Encode data using msgspec."""
101+
try:
102+
if as_bytes:
103+
return self._encoder.encode(data)
104+
return self._encoder.encode(data).decode("utf-8")
105+
except (TypeError, ValueError):
106+
if ORJSON_INSTALLED:
107+
return OrjsonSerializer().encode(data, as_bytes=as_bytes)
108+
return StandardLibSerializer().encode(data, as_bytes=as_bytes)
109+
110+
def decode(self, data: Union[str, bytes], *, decode_bytes: bool = True) -> Any:
111+
"""Decode data using msgspec."""
112+
if isinstance(data, bytes):
113+
if decode_bytes:
114+
try:
115+
return self._decoder.decode(data)
116+
except (TypeError, ValueError):
117+
if ORJSON_INSTALLED:
118+
return OrjsonSerializer().decode(data, decode_bytes=decode_bytes)
119+
return StandardLibSerializer().decode(data, decode_bytes=decode_bytes)
120+
return data
121+
122+
try:
123+
return self._decoder.decode(data.encode("utf-8"))
124+
except (TypeError, ValueError):
125+
if ORJSON_INSTALLED:
126+
return OrjsonSerializer().decode(data, decode_bytes=decode_bytes)
127+
return StandardLibSerializer().decode(data, decode_bytes=decode_bytes)
128+
129+
130+
class OrjsonSerializer(BaseJSONSerializer):
131+
"""Orjson-based JSON serializer with native datetime/UUID support."""
132+
133+
__slots__ = ()
134+
135+
def encode(self, data: Any, *, as_bytes: bool = False) -> Union[str, bytes]:
136+
"""Encode data using orjson."""
137+
from orjson import (
35138
OPT_NAIVE_UTC, # pyright: ignore[reportUnknownVariableType]
36139
OPT_SERIALIZE_NUMPY, # pyright: ignore[reportUnknownVariableType]
37140
OPT_SERIALIZE_UUID, # pyright: ignore[reportUnknownVariableType]
38141
)
39-
from orjson import dumps as _encode_json # pyright: ignore[reportUnknownVariableType,reportMissingImports]
40-
from orjson import loads as decode_json # type: ignore[no-redef,assignment,unused-ignore]
142+
from orjson import dumps as _orjson_dumps # pyright: ignore[reportMissingImports]
143+
144+
result = _orjson_dumps(
145+
data, default=_type_to_string, option=OPT_SERIALIZE_NUMPY | OPT_NAIVE_UTC | OPT_SERIALIZE_UUID
146+
)
147+
return result if as_bytes else result.decode("utf-8")
148+
149+
def decode(self, data: Union[str, bytes], *, decode_bytes: bool = True) -> Any:
150+
"""Decode data using orjson."""
151+
from orjson import loads as _orjson_loads # pyright: ignore[reportMissingImports]
152+
153+
if isinstance(data, bytes):
154+
if decode_bytes:
155+
return _orjson_loads(data)
156+
return data
157+
return _orjson_loads(data)
41158

42-
def encode_json(data: Any) -> str: # pragma: no cover
43-
return _encode_json(
44-
data, default=_type_to_string, option=OPT_SERIALIZE_NUMPY | OPT_NAIVE_UTC | OPT_SERIALIZE_UUID
45-
).decode("utf-8")
46159

47-
except ImportError:
48-
from json import dumps as encode_json # type: ignore[assignment]
49-
from json import loads as decode_json # type: ignore[assignment]
160+
class StandardLibSerializer(BaseJSONSerializer):
161+
"""Standard library JSON serializer as fallback."""
50162

51-
__all__ = ("convert_date_to_iso", "convert_datetime_to_gmt_iso", "decode_json", "encode_json")
163+
__slots__ = ()
164+
165+
def encode(self, data: Any, *, as_bytes: bool = False) -> Union[str, bytes]:
166+
"""Encode data using standard library json."""
167+
json_str = json.dumps(data, default=_type_to_string)
168+
return json_str.encode("utf-8") if as_bytes else json_str
169+
170+
def decode(self, data: Union[str, bytes], *, decode_bytes: bool = True) -> Any:
171+
"""Decode data using standard library json."""
172+
if isinstance(data, bytes):
173+
if decode_bytes:
174+
return json.loads(data.decode("utf-8"))
175+
return data
176+
return json.loads(data)
177+
178+
179+
_default_serializer: Optional[JSONSerializer] = None
180+
181+
182+
def get_default_serializer() -> JSONSerializer:
183+
"""Get the default serializer based on available libraries.
184+
185+
Priority: msgspec > orjson > stdlib
186+
187+
Returns:
188+
The best available JSON serializer.
189+
"""
190+
global _default_serializer
191+
192+
if _default_serializer is None:
193+
if MSGSPEC_INSTALLED:
194+
with contextlib.suppress(ImportError):
195+
_default_serializer = MsgspecSerializer()
196+
197+
if _default_serializer is None and ORJSON_INSTALLED:
198+
with contextlib.suppress(ImportError):
199+
_default_serializer = OrjsonSerializer()
200+
201+
if _default_serializer is None:
202+
_default_serializer = StandardLibSerializer()
203+
204+
assert _default_serializer is not None
205+
return _default_serializer
206+
207+
208+
@overload
209+
def encode_json(data: Any, *, as_bytes: Literal[False] = ...) -> str: ... # pragma: no cover
210+
211+
212+
@overload
213+
def encode_json(data: Any, *, as_bytes: Literal[True]) -> bytes: ... # pragma: no cover
214+
215+
216+
def encode_json(data: Any, *, as_bytes: bool = False) -> Union[str, bytes]:
217+
"""Encode to JSON, optionally returning bytes for optimal performance.
218+
219+
Args:
220+
data: The data to encode.
221+
as_bytes: Whether to return bytes instead of string.
222+
223+
Returns:
224+
JSON string or bytes depending on as_bytes parameter.
225+
"""
226+
return get_default_serializer().encode(data, as_bytes=as_bytes)
227+
228+
229+
def decode_json(data: Union[str, bytes], *, decode_bytes: bool = True) -> Any:
230+
"""Decode from JSON string or bytes efficiently.
231+
232+
Args:
233+
data: JSON string or bytes to decode.
234+
decode_bytes: Whether to decode bytes input.
235+
236+
Returns:
237+
Decoded Python object.
238+
"""
239+
return get_default_serializer().decode(data, decode_bytes=decode_bytes)
52240

53241

54242
def convert_datetime_to_gmt_iso(dt: datetime.datetime) -> str: # pragma: no cover
@@ -75,3 +263,17 @@ def convert_date_to_iso(dt: datetime.date) -> str: # pragma: no cover
75263
The ISO formatted date string.
76264
"""
77265
return dt.isoformat()
266+
267+
268+
__all__ = (
269+
"BaseJSONSerializer",
270+
"JSONSerializer",
271+
"MsgspecSerializer",
272+
"OrjsonSerializer",
273+
"StandardLibSerializer",
274+
"convert_date_to_iso",
275+
"convert_datetime_to_gmt_iso",
276+
"decode_json",
277+
"encode_json",
278+
"get_default_serializer",
279+
)

sqlspec/_typing.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,14 @@ class UnsetTypeStub(enum.Enum):
177177
MSGSPEC_INSTALLED = False # pyright: ignore[reportConstantRedefinition]
178178

179179

180+
try:
181+
import orjson # noqa: F401
182+
183+
ORJSON_INSTALLED = True # pyright: ignore[reportConstantRedefinition]
184+
except ImportError:
185+
ORJSON_INSTALLED = False # pyright: ignore[reportConstantRedefinition]
186+
187+
180188
# Always define stub type for DTOData
181189
@runtime_checkable
182190
class DTODataStub(Protocol[T]):
@@ -621,6 +629,7 @@ async def insert_returning(self, conn: Any, query_name: str, sql: str, parameter
621629
"NUMPY_INSTALLED",
622630
"OBSTORE_INSTALLED",
623631
"OPENTELEMETRY_INSTALLED",
632+
"ORJSON_INSTALLED",
624633
"PGVECTOR_INSTALLED",
625634
"PROMETHEUS_INSTALLED",
626635
"PYARROW_INSTALLED",

0 commit comments

Comments
 (0)