Skip to content

Commit cdc295e

Browse files
authored
feat(oracle): implicitly named columns create lower-case dictionary keys (#139)
Introduce a feature to normalize implicit Oracle uppercase column names to lowercase, enhancing compatibility with schema libraries that expect snake_case fields. This feature is enabled by default and preserves quoted case-sensitive aliases. Tests ensure correct functionality for both enabled and disabled states.
1 parent f0c5cb7 commit cdc295e

File tree

12 files changed

+433
-206
lines changed

12 files changed

+433
-206
lines changed

docs/guides/adapters/oracle.md

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ The Oracle session stores (`OracleAsyncStore` and `OracleSyncStore`) support opt
5454

5555
```python
5656
from sqlspec.adapters.oracledb import OracleAsyncConfig
57-
from sqlspec.adapters.oracledb.litestar.store import OracleAsyncStore
57+
from sqlspec.adapters.oracledb.litestar import OracleAsyncStore
5858

5959
config = OracleAsyncConfig(pool_config={"dsn": "oracle://..."})
6060

@@ -102,19 +102,39 @@ If `use_in_memory=True` but In-Memory is not available/licensed, table creation
102102

103103
**Recommendation:** Use `use_in_memory=False` (default) unless you have confirmed licensing and configuration.
104104

105-
### Performance Characteristics
105+
## Column Name Normalization
106106

107-
**In-Memory Session Store Performance:**
107+
Oracle returns unquoted identifiers in uppercase (for example `ID`, `PRODUCT_NAME`). When those rows feed into schema libraries that expect snake_case fields, the uppercase keys can trigger validation errors. SQLSpec resolves this automatically through the `enable_lowercase_column_names` driver feature, which is **enabled by default**.
108108

109-
- Read operations: 2-5x faster for single lookups
110-
- Scan operations: 10-50x faster for bulk operations
111-
- Write operations: Negligible impact (asynchronous population)
112-
- Memory usage: ~2-3x table size (dual format storage)
109+
```python
110+
from sqlspec.adapters.oracledb import OracleAsyncConfig
113111

114-
**Memory Sizing Example:**
112+
config = OracleAsyncConfig(
113+
pool_config={"dsn": "oracle://..."},
114+
driver_features={"enable_lowercase_column_names": True},
115+
)
116+
```
115117

116-
- 1M sessions × 5KB avg = ~5GB table size
117-
- In-Memory requirement: ~10-15GB (with compression)
118+
### How normalization works
119+
120+
- Identifiers matching Oracle's implicit uppercase pattern (`^(?!\d)(?:[A-Z0-9_]+)$`) are lowercased.
121+
- Quoted or user-defined aliases (mixed case, symbols, or names beginning with digits) retain their original casing.
122+
- Disabling the feature restores Oracle's native uppercase behaviour:
123+
124+
```python
125+
config = OracleAsyncConfig(
126+
pool_config={"dsn": "oracle://..."},
127+
driver_features={"enable_lowercase_column_names": False},
128+
)
129+
```
130+
131+
### When to opt out
132+
133+
- You rely on two columns that differ only by case (for example `ID` and `Id`).
134+
- You intentionally alias everything in uppercase and want to preserve that style.
135+
- You prefer to manage casing entirely in SQL using quoted identifiers.
136+
137+
In those scenarios set `enable_lowercase_column_names=False`. Otherwise, keep the default for seamless msgspec/pydantic hydration without extra SQL aliases.
118138

119139
## NumPy Vector Support (Oracle 23ai+)
120140

@@ -283,19 +303,22 @@ await session.execute(
283303
### Error Handling
284304

285305
**Unsupported dtype:**
306+
286307
```python
287308
vector = np.array([1.0, 2.0], dtype=np.float16) # Not supported
288309
# Raises: TypeError: Unsupported NumPy dtype for Oracle VECTOR: float16
289310
```
290311

291312
**Dimension mismatch:**
313+
292314
```python
293315
vector = np.random.rand(512).astype(np.float32)
294316
# Trying to insert into VECTOR(768, FLOAT32) column
295317
# Raises: ORA-51813: Vector dimension count must match
296318
```
297319

298320
**NumPy not installed:**
321+
299322
```python
300323
# With enable_numpy_vectors=True but NumPy not installed
301324
# Falls back to array.array (Python stdlib)

sqlspec/adapters/oracledb/config.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,13 @@ class OracleDriverFeatures(TypedDict):
9393
Defaults to True when NumPy is installed.
9494
Provides automatic bidirectional conversion between NumPy ndarrays and Oracle VECTOR columns.
9595
Supports float32, float64, int8, and uint8 dtypes.
96+
enable_lowercase_column_names: Normalize implicit Oracle uppercase column names to lowercase.
97+
Targets unquoted Oracle identifiers that default to uppercase while preserving quoted case-sensitive aliases.
98+
Defaults to True for compatibility with schema libraries expecting snake_case fields.
9699
"""
97100

98101
enable_numpy_vectors: NotRequired[bool]
102+
enable_lowercase_column_names: NotRequired[bool]
99103

100104

101105
class OracleSyncConfig(SyncDatabaseConfig[OracleSyncConnection, "OracleSyncConnectionPool", OracleSyncDriver]):
@@ -140,6 +144,8 @@ def __init__(
140144
processed_driver_features: dict[str, Any] = dict(driver_features) if driver_features else {}
141145
if "enable_numpy_vectors" not in processed_driver_features:
142146
processed_driver_features["enable_numpy_vectors"] = NUMPY_INSTALLED
147+
if "enable_lowercase_column_names" not in processed_driver_features:
148+
processed_driver_features["enable_lowercase_column_names"] = True
143149

144150
super().__init__(
145151
pool_config=processed_pool_config,
@@ -297,6 +303,8 @@ def __init__(
297303
processed_driver_features: dict[str, Any] = dict(driver_features) if driver_features else {}
298304
if "enable_numpy_vectors" not in processed_driver_features:
299305
processed_driver_features["enable_numpy_vectors"] = NUMPY_INSTALLED
306+
if "enable_lowercase_column_names" not in processed_driver_features:
307+
processed_driver_features["enable_lowercase_column_names"] = True
300308

301309
super().__init__(
302310
pool_config=processed_pool_config,

sqlspec/adapters/oracledb/data_dictionary.py

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,11 @@ def _get_columns_sql(self, table: str, schema: "str | None" = None) -> str:
120120
"""
121121
_ = schema
122122
return f"""
123-
SELECT column_name, data_type, data_length, nullable
123+
SELECT
124+
column_name AS "column_name",
125+
data_type AS "data_type",
126+
data_length AS "data_length",
127+
nullable AS "nullable"
124128
FROM user_tab_columns
125129
WHERE table_name = '{table.upper()}'
126130
ORDER BY column_id
@@ -135,7 +139,7 @@ def _get_oracle_version(self, driver: "OracleAsyncDriver | OracleSyncDriver") ->
135139
Returns:
136140
Oracle version information or None if detection fails
137141
"""
138-
banner = driver.select_value("SELECT banner FROM v$version WHERE banner LIKE 'Oracle%'")
142+
banner = driver.select_value("SELECT banner AS \"banner\" FROM v$version WHERE banner LIKE 'Oracle%'")
139143

140144
# Parse version from banner like "Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production"
141145
# or "Oracle Database 19c Standard Edition 2 Release 19.0.0.0.0 - Production"
@@ -170,7 +174,7 @@ def _get_oracle_compatible(self, driver: "OracleAsyncDriver | OracleSyncDriver")
170174
Compatible parameter value or None if detection fails
171175
"""
172176
try:
173-
compatible = driver.select_value("SELECT value FROM v$parameter WHERE name = 'compatible'")
177+
compatible = driver.select_value("SELECT value AS \"value\" FROM v$parameter WHERE name = 'compatible'")
174178
logger.debug("Detected Oracle compatible parameter: %s", compatible)
175179
return str(compatible)
176180
except Exception:
@@ -216,7 +220,7 @@ def _is_oracle_autonomous(self, driver: "OracleSyncDriver") -> bool:
216220
Returns:
217221
True if this is an Autonomous Database, False otherwise
218222
"""
219-
result = driver.select_value_or_none("SELECT COUNT(*) as cnt FROM v$pdbs WHERE cloud_identity IS NOT NULL")
223+
result = driver.select_value_or_none('SELECT COUNT(1) AS "cnt" FROM v$pdbs WHERE cloud_identity IS NOT NULL')
220224
return bool(result and int(result) > 0)
221225

222226
def get_version(self, driver: SyncDriverAdapterBase) -> "OracleVersionInfo | None":
@@ -304,10 +308,10 @@ def get_columns(
304308
305309
Returns:
306310
List of column metadata dictionaries with keys:
307-
- COLUMN_NAME: Name of the column (UPPERCASE in Oracle)
308-
- DATA_TYPE: Oracle data type
309-
- DATA_LENGTH: Maximum length (for character types)
310-
- NULLABLE: 'Y' or 'N'
311+
- column_name: Name of the column
312+
- data_type: Oracle data type
313+
- data_length: Maximum length (for character types)
314+
- nullable: 'Y' or 'N'
311315
"""
312316

313317
oracle_driver = cast("OracleSyncDriver", driver)
@@ -345,7 +349,7 @@ async def get_version(self, driver: AsyncDriverAdapterBase) -> "OracleVersionInf
345349
Oracle version information or None if detection fails
346350
"""
347351
banner = await cast("OracleAsyncDriver", driver).select_value(
348-
"SELECT banner FROM v$version WHERE banner LIKE 'Oracle%'"
352+
"SELECT banner AS \"banner\" FROM v$version WHERE banner LIKE 'Oracle%'"
349353
)
350354

351355
version_match = ORACLE_VERSION_PATTERN.search(str(banner))
@@ -385,7 +389,9 @@ async def _get_oracle_compatible_async(self, driver: "OracleAsyncDriver") -> "st
385389
Compatible parameter value or None if detection fails
386390
"""
387391
try:
388-
compatible = await driver.select_value("SELECT value FROM v$parameter WHERE name = 'compatible'")
392+
compatible = await driver.select_value(
393+
"SELECT value AS \"value\" FROM v$parameter WHERE name = 'compatible'"
394+
)
389395
logger.debug("Detected Oracle compatible parameter: %s", compatible)
390396
return str(compatible)
391397
except Exception:
@@ -403,7 +409,7 @@ async def _is_oracle_autonomous_async(self, driver: "OracleAsyncDriver") -> bool
403409
"""
404410
# Check for cloud_identity in v$pdbs (most reliable for Autonomous)
405411
with suppress(Exception):
406-
result = await driver.execute("SELECT COUNT(*) as cnt FROM v$pdbs WHERE cloud_identity IS NOT NULL")
412+
result = await driver.execute('SELECT COUNT(1) AS "cnt" FROM v$pdbs WHERE cloud_identity IS NOT NULL')
407413
if result.data:
408414
count = result.data[0]["cnt"] if isinstance(result.data[0], dict) else result.data[0][0]
409415
if int(count) > 0:
@@ -475,10 +481,10 @@ async def get_columns(
475481
476482
Returns:
477483
List of column metadata dictionaries with keys:
478-
- COLUMN_NAME: Name of the column (UPPERCASE in Oracle)
479-
- DATA_TYPE: Oracle data type
480-
- DATA_LENGTH: Maximum length (for character types)
481-
- NULLABLE: 'Y' or 'N'
484+
- column_name: Name of the column
485+
- data_type: Oracle data type
486+
- data_length: Maximum length (for character types)
487+
- nullable: 'Y' or 'N'
482488
"""
483489

484490
oracle_driver = cast("OracleAsyncDriver", driver)

sqlspec/adapters/oracledb/driver.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
import contextlib
44
import logging
5-
from typing import TYPE_CHECKING, Any
5+
import re
6+
from typing import TYPE_CHECKING, Any, Final
67

78
import oracledb
89
from oracledb import AsyncCursor, Cursor
@@ -48,6 +49,22 @@
4849

4950
_type_converter = OracleTypeConverter()
5051

52+
IMPLICIT_UPPER_COLUMN_PATTERN: Final[re.Pattern[str]] = re.compile(r"^(?!\d)(?:[A-Z0-9_]+)$")
53+
54+
55+
def _normalize_column_names(column_names: "list[str]", driver_features: "dict[str, Any]") -> "list[str]":
56+
should_lowercase = driver_features.get("enable_lowercase_column_names", False)
57+
if not should_lowercase:
58+
return column_names
59+
normalized: list[str] = []
60+
for name in column_names:
61+
if name and IMPLICIT_UPPER_COLUMN_PATTERN.fullmatch(name):
62+
normalized.append(name.lower())
63+
else:
64+
normalized.append(name)
65+
return normalized
66+
67+
5168
__all__ = (
5269
"OracleAsyncDriver",
5370
"OracleAsyncExceptionHandler",
@@ -468,6 +485,7 @@ def _execute_statement(self, cursor: Any, statement: "SQL") -> "ExecutionResult"
468485
if statement.returns_rows():
469486
fetched_data = cursor.fetchall()
470487
column_names = [col[0] for col in cursor.description or []]
488+
column_names = _normalize_column_names(column_names, self.driver_features)
471489

472490
# Oracle returns tuples - convert to consistent dict format
473491
data = [dict(zip(column_names, row, strict=False)) for row in fetched_data]
@@ -660,6 +678,7 @@ async def _execute_statement(self, cursor: Any, statement: "SQL") -> "ExecutionR
660678
if statement.returns_rows():
661679
fetched_data = await cursor.fetchall()
662680
column_names = [col[0] for col in cursor.description or []]
681+
column_names = _normalize_column_names(column_names, self.driver_features)
663682

664683
# Oracle returns tuples - convert to consistent dict format
665684
data = [dict(zip(column_names, row, strict=False)) for row in fetched_data]

sqlspec/adapters/oracledb/migrations.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def _migrate_schema_if_needed(self, driver: "SyncDriverAdapterBase") -> None:
159159
"""
160160
try:
161161
columns_data = driver.data_dictionary.get_columns(driver, self.version_table)
162-
existing_columns = {row["COLUMN_NAME"] for row in columns_data}
162+
existing_columns = {str(row["column_name"]).upper() for row in columns_data}
163163
missing_columns = self._detect_missing_columns(existing_columns)
164164

165165
if not missing_columns:
@@ -353,7 +353,7 @@ async def _migrate_schema_if_needed(self, driver: "AsyncDriverAdapterBase") -> N
353353
"""
354354
try:
355355
columns_data = await driver.data_dictionary.get_columns(driver, self.version_table)
356-
existing_columns = {row["COLUMN_NAME"] for row in columns_data}
356+
existing_columns = {str(row["column_name"]).upper() for row in columns_data}
357357
missing_columns = self._detect_missing_columns(existing_columns)
358358

359359
if not missing_columns:

0 commit comments

Comments
 (0)