Skip to content

Commit eaa258d

Browse files
authored
feat: improve schema conversion and caching optimizations (#92)
Implement multiple phases of performance improvements, including schema conversion caching, parameter extraction optimizations, and LRU caching for type converters. These changes collectively enhance execution speed and reduce memory usage across various database adapters. All tests pass successfully.
1 parent eb63bfc commit eaa258d

File tree

11 files changed

+790
-518
lines changed

11 files changed

+790
-518
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ repos:
1717
- id: mixed-line-ending
1818
- id: trailing-whitespace
1919
- repo: https://github.com/charliermarsh/ruff-pre-commit
20-
rev: "v0.13.2"
20+
rev: "v0.13.3"
2121
hooks:
2222
- id: ruff
2323
args: ["--fix"]

sqlspec/adapters/adbc/type_converter.py

Lines changed: 44 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -5,32 +5,65 @@
55
MySQL, BigQuery, Snowflake).
66
"""
77

8-
from typing import Any
8+
from functools import lru_cache
9+
from typing import Any, Final
910

1011
from sqlspec.core.type_conversion import BaseTypeConverter
1112
from sqlspec.utils.serializers import to_json
1213

14+
ADBC_SPECIAL_CHARS: Final[frozenset[str]] = frozenset({"{", "[", "-", ":", "T", "."})
15+
1316

1417
class ADBCTypeConverter(BaseTypeConverter):
1518
"""ADBC-specific type converter with dialect awareness.
1619
1720
Extends the base BaseTypeConverter with ADBC multi-backend functionality
1821
including dialect-specific type handling for different database systems.
22+
Includes per-instance LRU cache for improved performance.
1923
"""
2024

21-
__slots__ = ("dialect",)
25+
__slots__ = ("_convert_cache", "dialect")
2226

23-
def __init__(self, dialect: str) -> None:
24-
"""Initialize with dialect-specific configuration.
27+
def __init__(self, dialect: str, cache_size: int = 5000) -> None:
28+
"""Initialize with dialect-specific configuration and conversion cache.
2529
2630
Args:
2731
dialect: Target database dialect (postgres, sqlite, duckdb, etc.)
32+
cache_size: Maximum number of string values to cache (default: 5000)
2833
"""
2934
super().__init__()
3035
self.dialect = dialect.lower()
3136

37+
@lru_cache(maxsize=cache_size)
38+
def _cached_convert(value: str) -> Any:
39+
if not value or not any(c in value for c in ADBC_SPECIAL_CHARS):
40+
return value
41+
detected_type = self.detect_type(value)
42+
if detected_type:
43+
try:
44+
if self.dialect in {"postgres", "postgresql"}:
45+
if detected_type in {"uuid", "interval"}:
46+
return self.convert_value(value, detected_type)
47+
elif self.dialect == "duckdb":
48+
if detected_type == "uuid":
49+
return self.convert_value(value, detected_type)
50+
elif self.dialect == "sqlite":
51+
if detected_type == "uuid":
52+
return str(value)
53+
elif self.dialect == "bigquery":
54+
if detected_type == "uuid":
55+
return self.convert_value(value, detected_type)
56+
elif self.dialect in {"mysql", "snowflake"} and detected_type in {"uuid", "json"}:
57+
return self.convert_value(value, detected_type)
58+
return self.convert_value(value, detected_type)
59+
except Exception:
60+
return value
61+
return value
62+
63+
self._convert_cache = _cached_convert
64+
3265
def convert_if_detected(self, value: Any) -> Any:
33-
"""Convert value with dialect-specific handling.
66+
"""Convert value with dialect-specific handling (cached).
3467
3568
Args:
3669
value: Value to potentially convert.
@@ -40,37 +73,7 @@ def convert_if_detected(self, value: Any) -> Any:
4073
"""
4174
if not isinstance(value, str):
4275
return value
43-
44-
if not any(c in value for c in ["{", "[", "-", ":", "T"]):
45-
return value
46-
47-
detected_type = self.detect_type(value)
48-
if detected_type:
49-
try:
50-
if self.dialect in {"postgres", "postgresql"}:
51-
if detected_type in {"uuid", "interval"}:
52-
return self.convert_value(value, detected_type)
53-
54-
elif self.dialect == "duckdb":
55-
if detected_type == "uuid":
56-
return self.convert_value(value, detected_type)
57-
58-
elif self.dialect == "sqlite":
59-
if detected_type == "uuid":
60-
return str(value)
61-
62-
elif self.dialect == "bigquery":
63-
if detected_type == "uuid":
64-
return self.convert_value(value, detected_type)
65-
66-
elif self.dialect in {"mysql", "snowflake"} and detected_type in {"uuid", "json"}:
67-
return self.convert_value(value, detected_type)
68-
69-
return self.convert_value(value, detected_type)
70-
except Exception:
71-
return value
72-
73-
return value
76+
return self._convert_cache(value)
7477

7578
def convert_dict(self, value: dict[str, Any]) -> Any:
7679
"""Convert dictionary values with dialect-specific handling.
@@ -81,13 +84,8 @@ def convert_dict(self, value: dict[str, Any]) -> Any:
8184
Returns:
8285
Converted value appropriate for the dialect.
8386
"""
84-
85-
# For dialects that cannot handle raw dicts (like ADBC PostgreSQL),
86-
# convert to JSON strings
8787
if self.dialect in {"postgres", "postgresql", "bigquery"}:
8888
return to_json(value)
89-
90-
# For other dialects, pass through unchanged
9189
return value
9290

9391
def supports_native_type(self, type_name: str) -> bool:
@@ -104,11 +102,10 @@ def supports_native_type(self, type_name: str) -> bool:
104102
"postgresql": ["uuid", "json", "interval", "pg_array"],
105103
"duckdb": ["uuid", "json"],
106104
"bigquery": ["json"],
107-
"sqlite": [], # Limited native type support
105+
"sqlite": [],
108106
"mysql": ["json"],
109107
"snowflake": ["json"],
110108
}
111-
112109
return type_name in native_support.get(self.dialect, [])
113110

114111
def get_dialect_specific_converter(self, value: Any, target_type: str) -> Any:
@@ -124,36 +121,33 @@ def get_dialect_specific_converter(self, value: Any, target_type: str) -> Any:
124121
if self.dialect in {"postgres", "postgresql"}:
125122
if target_type in {"uuid", "json", "interval"}:
126123
return self.convert_value(value, target_type)
127-
128124
elif self.dialect == "duckdb":
129125
if target_type in {"uuid", "json"}:
130126
return self.convert_value(value, target_type)
131-
132127
elif self.dialect == "sqlite":
133128
if target_type == "uuid":
134129
return str(value)
135130
if target_type == "json":
136131
return self.convert_value(value, target_type)
137-
138132
elif self.dialect == "bigquery":
139133
if target_type == "uuid":
140134
return str(self.convert_value(value, target_type))
141135
if target_type == "json":
142136
return self.convert_value(value, target_type)
143-
144137
return self.convert_value(value, target_type) if hasattr(self, "convert_value") else value
145138

146139

147-
def get_adbc_type_converter(dialect: str) -> ADBCTypeConverter:
140+
def get_adbc_type_converter(dialect: str, cache_size: int = 5000) -> ADBCTypeConverter:
148141
"""Factory function to create dialect-specific ADBC type converter.
149142
150143
Args:
151144
dialect: Database dialect name.
145+
cache_size: Maximum number of string values to cache (default: 5000)
152146
153147
Returns:
154148
Configured ADBCTypeConverter instance.
155149
"""
156-
return ADBCTypeConverter(dialect)
150+
return ADBCTypeConverter(dialect, cache_size)
157151

158152

159-
__all__ = ("ADBCTypeConverter", "get_adbc_type_converter")
153+
__all__ = ("ADBC_SPECIAL_CHARS", "ADBCTypeConverter", "get_adbc_type_converter")

sqlspec/adapters/asyncpg/driver.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,60 @@
3131
logger = get_logger("adapters.asyncpg")
3232

3333

34+
def _convert_datetime_param(value: Any) -> Any:
35+
"""Convert datetime parameter, handling ISO strings.
36+
37+
Args:
38+
value: datetime object or ISO format string
39+
40+
Returns:
41+
datetime object for asyncpg
42+
"""
43+
if isinstance(value, str):
44+
return datetime.datetime.fromisoformat(value)
45+
return value
46+
47+
48+
def _convert_date_param(value: Any) -> Any:
49+
"""Convert date parameter, handling ISO strings.
50+
51+
Args:
52+
value: date object or ISO format string
53+
54+
Returns:
55+
date object for asyncpg
56+
"""
57+
if isinstance(value, str):
58+
return datetime.date.fromisoformat(value)
59+
return value
60+
61+
62+
def _convert_time_param(value: Any) -> Any:
63+
"""Convert time parameter, handling ISO strings.
64+
65+
Args:
66+
value: time object or ISO format string
67+
68+
Returns:
69+
time object for asyncpg
70+
"""
71+
if isinstance(value, str):
72+
return datetime.time.fromisoformat(value)
73+
return value
74+
75+
3476
asyncpg_statement_config = StatementConfig(
3577
dialect="postgres",
3678
parameter_config=ParameterStyleConfig(
3779
default_parameter_style=ParameterStyle.NUMERIC,
3880
supported_parameter_styles={ParameterStyle.NUMERIC, ParameterStyle.POSITIONAL_PYFORMAT},
3981
default_execution_parameter_style=ParameterStyle.NUMERIC,
4082
supported_execution_parameter_styles={ParameterStyle.NUMERIC},
41-
type_coercion_map={datetime.datetime: lambda x: x, datetime.date: lambda x: x, datetime.time: lambda x: x},
83+
type_coercion_map={
84+
datetime.datetime: _convert_datetime_param,
85+
datetime.date: _convert_date_param,
86+
datetime.time: _convert_time_param,
87+
},
4288
has_native_list_expansion=True,
4389
needs_static_script_compilation=False,
4490
preserve_parameter_format=True,

sqlspec/adapters/bigquery/type_converter.py

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
for the native BigQuery driver.
55
"""
66

7+
from functools import lru_cache
78
from typing import Any, Final, Optional
89
from uuid import UUID
910

@@ -14,7 +15,6 @@
1415
except ImportError:
1516
ScalarQueryParameter = None # type: ignore[assignment,misc]
1617

17-
# Enhanced BigQuery type mapping with UUID support
1818
BQ_TYPE_MAP: Final[dict[str, str]] = {
1919
"str": "STRING",
2020
"int": "INT64",
@@ -23,23 +23,61 @@
2323
"datetime": "DATETIME",
2424
"date": "DATE",
2525
"time": "TIME",
26-
"UUID": "STRING", # UUID as STRING in BigQuery
26+
"UUID": "STRING",
2727
"uuid": "STRING",
2828
"Decimal": "NUMERIC",
2929
"bytes": "BYTES",
3030
"list": "ARRAY",
3131
"dict": "STRUCT",
3232
}
3333

34+
BIGQUERY_SPECIAL_CHARS: Final[frozenset[str]] = frozenset({"{", "[", "-", ":", "T", "."})
35+
3436

3537
class BigQueryTypeConverter(BaseTypeConverter):
3638
"""BigQuery-specific type conversion with UUID support.
3739
3840
Extends the base TypeDetector with BigQuery-specific functionality
3941
including UUID parameter handling for the native BigQuery driver.
42+
Includes per-instance LRU cache for improved performance.
4043
"""
4144

42-
__slots__ = ()
45+
__slots__ = ("_convert_cache",)
46+
47+
def __init__(self, cache_size: int = 5000) -> None:
48+
"""Initialize converter with per-instance conversion cache.
49+
50+
Args:
51+
cache_size: Maximum number of string values to cache (default: 5000)
52+
"""
53+
super().__init__()
54+
55+
@lru_cache(maxsize=cache_size)
56+
def _cached_convert(value: str) -> Any:
57+
if not value or not any(c in value for c in BIGQUERY_SPECIAL_CHARS):
58+
return value
59+
detected_type = self.detect_type(value)
60+
if detected_type:
61+
try:
62+
return self.convert_value(value, detected_type)
63+
except Exception:
64+
return value
65+
return value
66+
67+
self._convert_cache = _cached_convert
68+
69+
def convert_if_detected(self, value: Any) -> Any:
70+
"""Convert string if special type detected (cached).
71+
72+
Args:
73+
value: Value to potentially convert
74+
75+
Returns:
76+
Converted value or original value
77+
"""
78+
if not isinstance(value, str):
79+
return value
80+
return self._convert_cache(value)
4381

4482
def create_parameter(self, name: str, value: Any) -> Optional[Any]:
4583
"""Create BigQuery parameter with proper type mapping.
@@ -63,7 +101,6 @@ def create_parameter(self, name: str, value: Any) -> Optional[Any]:
63101
uuid_obj = convert_uuid(value)
64102
return ScalarQueryParameter(name, "STRING", str(uuid_obj))
65103

66-
# Handle other types
67104
param_type = BQ_TYPE_MAP.get(type(value).__name__, "STRING")
68105
return ScalarQueryParameter(name, param_type, value)
69106

@@ -78,16 +115,8 @@ def convert_bigquery_value(self, value: Any, column_type: str) -> Any:
78115
Converted value appropriate for the column type.
79116
"""
80117
if column_type == "STRING" and isinstance(value, str):
81-
# Try to detect if this is a special type
82-
detected_type = self.detect_type(value)
83-
if detected_type:
84-
try:
85-
return self.convert_value(value, detected_type)
86-
except Exception:
87-
# If conversion fails, return original value
88-
return value
89-
118+
return self.convert_if_detected(value)
90119
return value
91120

92121

93-
__all__ = ("BQ_TYPE_MAP", "BigQueryTypeConverter")
122+
__all__ = ("BIGQUERY_SPECIAL_CHARS", "BQ_TYPE_MAP", "BigQueryTypeConverter")

0 commit comments

Comments
 (0)