Skip to content

Commit 04c9f62

Browse files
committed
Handling data type in python
1 parent a380702 commit 04c9f62

File tree

1 file changed

+118
-10
lines changed

1 file changed

+118
-10
lines changed

chdb/state/sqlitelike.py

Lines changed: 118 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -77,17 +77,103 @@ def __init__(self, connection):
7777
def execute(self, query: str) -> None:
7878
self._cursor.execute(query)
7979
result_mv = self._cursor.get_memview()
80-
# print("get_result", result_mv)
8180
if self._cursor.has_error():
8281
raise Exception(self._cursor.error_message())
8382
if self._cursor.data_size() == 0:
8483
self._current_table = None
8584
self._current_row = 0
85+
self._column_names = []
86+
self._column_types = []
8687
return
87-
arrow_data = result_mv.tobytes()
88-
reader = pa.ipc.open_stream(io.BytesIO(arrow_data))
89-
self._current_table = reader.read_all()
90-
self._current_row = 0
88+
89+
# Parse JSON data
90+
json_data = result_mv.tobytes().decode("utf-8")
91+
import json
92+
93+
try:
94+
# First line contains column names
95+
# Second line contains column types
96+
# Following lines contain data
97+
lines = json_data.strip().split("\n")
98+
if len(lines) < 2:
99+
self._current_table = None
100+
self._current_row = 0
101+
self._column_names = []
102+
self._column_types = []
103+
return
104+
105+
self._column_names = json.loads(lines[0])
106+
self._column_types = json.loads(lines[1])
107+
108+
# Convert data rows
109+
rows = []
110+
for line in lines[2:]:
111+
if not line.strip():
112+
continue
113+
row_data = json.loads(line)
114+
converted_row = []
115+
for val, type_info in zip(row_data, self._column_types):
116+
# Handle NULL values first
117+
if val is None:
118+
converted_row.append(None)
119+
continue
120+
121+
# Basic type conversion
122+
try:
123+
if type_info.startswith("Int") or type_info.startswith("UInt"):
124+
converted_row.append(int(val))
125+
elif type_info.startswith("Float"):
126+
converted_row.append(float(val))
127+
elif type_info == "Bool":
128+
converted_row.append(bool(val))
129+
elif type_info == "String" or type_info == "FixedString":
130+
converted_row.append(str(val))
131+
elif type_info.startswith("DateTime"):
132+
from datetime import datetime
133+
134+
# Check if the value is numeric (timestamp)
135+
val_str = str(val)
136+
if val_str.replace(".", "").isdigit():
137+
converted_row.append(datetime.fromtimestamp(float(val)))
138+
else:
139+
# Handle datetime string formats
140+
if "." in val_str: # Has microseconds
141+
converted_row.append(
142+
datetime.strptime(
143+
val_str, "%Y-%m-%d %H:%M:%S.%f"
144+
)
145+
)
146+
else: # No microseconds
147+
converted_row.append(
148+
datetime.strptime(val_str, "%Y-%m-%d %H:%M:%S")
149+
)
150+
elif type_info.startswith("Date"):
151+
from datetime import date, datetime
152+
153+
# Check if the value is numeric (days since epoch)
154+
val_str = str(val)
155+
if val_str.isdigit():
156+
converted_row.append(
157+
date.fromtimestamp(float(val) * 86400)
158+
)
159+
else:
160+
# Handle date string format
161+
converted_row.append(
162+
datetime.strptime(val_str, "%Y-%m-%d").date()
163+
)
164+
else:
165+
# For unsupported types, keep as string
166+
converted_row.append(str(val))
167+
except (ValueError, TypeError):
168+
# If conversion fails, keep original value as string
169+
converted_row.append(str(val))
170+
rows.append(tuple(converted_row))
171+
172+
self._current_table = rows
173+
self._current_row = 0
174+
175+
except json.JSONDecodeError as e:
176+
raise Exception(f"Failed to parse JSON data: {e}")
91177

92178
def commit(self) -> None:
93179
self._cursor.commit()
@@ -96,12 +182,10 @@ def fetchone(self) -> Optional[tuple]:
96182
if not self._current_table or self._current_row >= len(self._current_table):
97183
return None
98184

99-
row_dict = {
100-
col: self._current_table.column(col)[self._current_row].as_py()
101-
for col in self._current_table.column_names
102-
}
185+
# Now self._current_table is a list of row tuples
186+
row = self._current_table[self._current_row]
103187
self._current_row += 1
104-
return tuple(row_dict.values())
188+
return row
105189

106190
def fetchmany(self, size: int = 1) -> tuple:
107191
if not self._current_table:
@@ -135,6 +219,30 @@ def __next__(self) -> tuple:
135219
raise StopIteration
136220
return row
137221

222+
def column_names(self) -> list:
223+
"""Return a list of column names from the last executed query"""
224+
return self._column_names if hasattr(self, "_column_names") else []
225+
226+
def column_types(self) -> list:
227+
"""Return a list of column types from the last executed query"""
228+
return self._column_types if hasattr(self, "_column_types") else []
229+
230+
@property
231+
def description(self) -> list:
232+
"""
233+
Return a description of the columns as per DB-API 2.0
234+
Returns a list of 7-item tuples, each containing:
235+
(name, type_code, display_size, internal_size, precision, scale, null_ok)
236+
where only name and type_code are provided
237+
"""
238+
if not hasattr(self, "_column_names") or not self._column_names:
239+
return []
240+
241+
return [
242+
(name, type_info, None, None, None, None, None)
243+
for name, type_info in zip(self._column_names, self._column_types)
244+
]
245+
138246

139247
def connect(connection_string: str = ":memory:") -> Connection:
140248
"""

0 commit comments

Comments
 (0)