Skip to content

Commit affe78e

Browse files
authored
Merge pull request #370 from kafka1991/victor_dbapi_placholder
feat(DBAPI): support `?` as placeholder to improve compatibility for `DBAPI`
2 parents 001606e + 6eaffb9 commit affe78e

File tree

6 files changed

+269
-16
lines changed

6 files changed

+269
-16
lines changed

.github/workflows/build_linux_arm64_wheels-gh.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,11 @@ jobs:
158158
sudo rm -f dist/*linux_aarch64.whl
159159
ls -lh dist
160160
shell: bash
161+
- name: Setup core dump collection
162+
run: |
163+
mkdir -p tmp/core
164+
echo "tmp/core/core.%p" | sudo tee /proc/sys/kernel/core_pattern
165+
ulimit -c unlimited
161166
- name: Test wheel on all Python versions
162167
run: |
163168
export PATH="$HOME/.pyenv/bin:$PATH"
@@ -171,6 +176,25 @@ jobs:
171176
pyenv shell --unset
172177
done
173178
continue-on-error: false
179+
- name: Check and upload core files if present
180+
if: always()
181+
run: |
182+
if ls tmp/core/core.* >/dev/null 2>&1; then
183+
echo "CORE_FILES_FOUND=true" >> $GITHUB_ENV
184+
tar -czvf core-files-linux-aarch64.tar.gz tmp/core/core.*
185+
echo "Core files tar created: core-files-linux-aarch64.tar.gz"
186+
ls -lh core-files-linux-aarch64.tar.gz
187+
else
188+
echo "CORE_FILES_FOUND=false" >> $GITHUB_ENV
189+
echo "No core files found in tmp/core"
190+
fi
191+
continue-on-error: true
192+
- name: Upload core files if present
193+
if: always() && env.CORE_FILES_FOUND == 'true'
194+
uses: actions/upload-artifact@v4
195+
with:
196+
name: core-files-linux-aarch64
197+
path: core-files-linux-aarch64.tar.gz
174198
- name: Upload wheels to release
175199
if: startsWith(github.ref, 'refs/tags/v')
176200
run: |

.github/workflows/build_linux_x86_wheels.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,11 @@ jobs:
157157
sudo rm -f dist/*-linux_x86_64.whl
158158
ls -lh dist
159159
shell: bash
160+
- name: Setup core dump collection
161+
run: |
162+
mkdir -p tmp/core
163+
echo "tmp/core/core.%p" | sudo tee /proc/sys/kernel/core_pattern
164+
ulimit -c unlimited
160165
- name: Test wheel on all Python versions
161166
run: |
162167
export PATH="$HOME/.pyenv/bin:$PATH"
@@ -170,6 +175,25 @@ jobs:
170175
pyenv shell --unset
171176
done
172177
continue-on-error: false
178+
- name: Check and upload core files if present
179+
if: always()
180+
run: |
181+
if ls tmp/core/core.* >/dev/null 2>&1; then
182+
echo "CORE_FILES_FOUND=true" >> $GITHUB_ENV
183+
tar -czvf core-files-linux-x86_64.tar.gz tmp/core/core.*
184+
echo "Core files tar created: core-files-linux-x86_64.tar.gz"
185+
ls -lh core-files-linux-x86_64.tar.gz
186+
else
187+
echo "CORE_FILES_FOUND=false" >> $GITHUB_ENV
188+
echo "No core files found in tmp/core"
189+
fi
190+
continue-on-error: true
191+
- name: Upload core files artifact
192+
if: always() && env.CORE_FILES_FOUND == 'true'
193+
uses: actions/upload-artifact@v4
194+
with:
195+
name: core-files-linux-x86_64
196+
path: core-files-linux-x86_64.tar.gz
173197
- name: Upload wheels to release
174198
if: startsWith(github.ref, 'refs/tags/v')
175199
run: |

.github/workflows/build_macos_arm64_wheels.yml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,12 @@ jobs:
157157
eval "$(pyenv init -)"
158158
pyenv shell 3.8
159159
python -m wheel tags --platform-tag=macosx_11_0_arm64 --remove dist/*.whl
160+
- name: Setup core dump
161+
run: |
162+
mkdir -p tmp/core
163+
sudo sysctl kern.corefile=$PWD/tmp/core/core.%P
164+
sudo sysctl kern.coredump=1
165+
ulimit -c unlimited
160166
- name: Test wheel on all Python versions
161167
run: |
162168
export PATH="$HOME/.pyenv/bin:$PATH"
@@ -170,6 +176,25 @@ jobs:
170176
pyenv shell --unset
171177
done
172178
continue-on-error: false
179+
- name: Check and upload core files if present
180+
if: always()
181+
run: |
182+
if ls tmp/core/core.* >/dev/null 2>&1; then
183+
echo "CORE_FILES_FOUND=true" >> $GITHUB_ENV
184+
tar -czvf core-files-macos-arm64.tar.gz tmp/core/core.*
185+
echo "Core files tar created: core-files-macos-arm64.tar.gz"
186+
ls -lh core-files-macos-arm64.tar.gz
187+
else
188+
echo "CORE_FILES_FOUND=false" >> $GITHUB_ENV
189+
echo "No core files found in tmp/core"
190+
fi
191+
continue-on-error: true
192+
- name: Upload core files artifact
193+
if: always() && env.CORE_FILES_FOUND == 'true'
194+
uses: actions/upload-artifact@v4
195+
with:
196+
name: core-files-macos-arm64
197+
path: core-files-macos-arm64.tar.gz
173198
- name: Show files
174199
run: ls -lh dist
175200
shell: bash

.github/workflows/build_macos_x86_wheels.yml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,12 @@ jobs:
157157
eval "$(pyenv init -)"
158158
pyenv shell 3.8
159159
python -m wheel tags --platform-tag=macosx_10_15_x86_64 --remove dist/*.whl
160+
- name: Setup core dump collection
161+
run: |
162+
mkdir -p tmp/core
163+
sudo sysctl kern.corefile=$PWD/tmp/core/core.%P
164+
sudo sysctl kern.coredump=1
165+
ulimit -c unlimited
160166
- name: Test wheel on all Python versions
161167
run: |
162168
export PATH="$HOME/.pyenv/bin:$PATH"
@@ -170,6 +176,25 @@ jobs:
170176
pyenv shell --unset
171177
done
172178
continue-on-error: false
179+
- name: Check and upload core files if present
180+
if: always()
181+
run: |
182+
if ls tmp/core/core.* >/dev/null 2>&1; then
183+
echo "CORE_FILES_FOUND=true" >> $GITHUB_ENV
184+
tar -czvf core-files-macos-x86_64.tar.gz tmp/core/core.*
185+
echo "Core files tar created: core-files-macos-x86_64.tar.gz"
186+
ls -lh core-files-macos-x86_64.tar.gz
187+
else
188+
echo "CORE_FILES_FOUND=false" >> $GITHUB_ENV
189+
echo "No core files found in tmp/core"
190+
fi
191+
continue-on-error: true
192+
- name: Upload core files artifact
193+
if: always() && env.CORE_FILES_FOUND == 'true'
194+
uses: actions/upload-artifact@v4
195+
with:
196+
name: core-files-macos-x86_64
197+
path: core-files-macos-x86_64.tar.gz
173198
- name: Show files
174199
run: ls -lh dist
175200
shell: bash

chdb/dbapi/cursors.py

Lines changed: 120 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
# You can use it to load large dataset.
77
RE_INSERT_VALUES = re.compile(
88
r"\s*((?:INSERT|REPLACE)\b.+\bVALUES?\s*)"
9-
+ r"(\(\s*(?:%s|%\(.+\)s)\s*(?:,\s*(?:%s|%\(.+\)s)\s*)*\))"
9+
+ r"(\(\s*(?:%s|%\(.+\)s|\?)\s*(?:,\s*(?:%s|%\(.+\)s|\?)\s*)*\))"
1010
+ r"(\s*(?:ON DUPLICATE.*)?);?\s*\Z",
1111
re.IGNORECASE | re.DOTALL,
1212
)
@@ -99,6 +99,49 @@ def _escape_args(self, args, conn):
9999
# Worst case it will throw a Value error
100100
return conn.escape(args)
101101

102+
def _format_query(self, query, args, conn):
103+
"""Format query with arguments supporting ? and % placeholders."""
104+
if args is None or ('?' not in query and '%' not in query):
105+
return query
106+
107+
escaped_args = self._escape_args(args, conn)
108+
if not isinstance(escaped_args, (tuple, list)):
109+
escaped_args = (escaped_args,)
110+
111+
result = []
112+
arg_index = 0
113+
max_args = len(escaped_args)
114+
i = 0
115+
query_len = len(query)
116+
in_string = False
117+
quote_char = None
118+
119+
while i < query_len:
120+
char = query[i]
121+
if not in_string:
122+
if char in ("'", '"'):
123+
in_string = True
124+
quote_char = char
125+
elif arg_index < max_args:
126+
if char == '?':
127+
result.append(str(escaped_args[arg_index]))
128+
arg_index += 1
129+
i += 1
130+
continue
131+
elif char == '%' and i + 1 < query_len and query[i + 1] == 's':
132+
result.append(str(escaped_args[arg_index]))
133+
arg_index += 1
134+
i += 2
135+
continue
136+
elif char == quote_char and (i == 0 or query[i - 1] != '\\'):
137+
in_string = False
138+
quote_char = None
139+
140+
result.append(char)
141+
i += 1
142+
143+
return ''.join(result)
144+
102145
def mogrify(self, query, args=None):
103146
"""
104147
Returns the exact string that is sent to the database by calling the
@@ -107,11 +150,7 @@ def mogrify(self, query, args=None):
107150
This method follows the extension to the DB API 2.0 followed by Psycopg.
108151
"""
109152
conn = self._get_db()
110-
111-
if args is not None:
112-
query = query % self._escape_args(args, conn)
113-
114-
return query
153+
return self._format_query(query, args, conn)
115154

116155
def execute(self, query, args=None):
117156
"""Execute a query
@@ -124,12 +163,11 @@ def execute(self, query, args=None):
124163
:return: Number of affected rows
125164
:rtype: int
126165
127-
If args is a list or tuple, %s can be used as a placeholder in the query.
166+
If args is a list or tuple, ? can be used as a placeholder in the query.
128167
If args is a dict, %(name)s can be used as a placeholder in the query.
168+
Also supports %s placeholder for backward compatibility.
129169
"""
130-
if args is not None:
131-
query = query % self._escape_args(args, self.connection)
132-
170+
query = self._format_query(query, args, self.connection)
133171
self._cursor.execute(query)
134172

135173
# Get description from column names and types
@@ -183,32 +221,98 @@ def executemany(self, query, args):
183221
self.rowcount = sum(self.execute(query, arg) for arg in args)
184222
return self.rowcount
185223

224+
def _find_placeholder_positions(self, query):
225+
positions = []
226+
i = 0
227+
query_len = len(query)
228+
in_string = False
229+
quote_char = None
230+
231+
while i < query_len:
232+
char = query[i]
233+
if not in_string:
234+
if char in ("'", '"'):
235+
in_string = True
236+
quote_char = char
237+
elif char == '?':
238+
positions.append((i, 1)) # (position, length)
239+
elif char == '%' and i + 1 < query_len and query[i + 1] == 's':
240+
positions.append((i, 2))
241+
i += 1
242+
elif char == quote_char and (i == 0 or query[i - 1] != '\\'):
243+
in_string = False
244+
quote_char = None
245+
i += 1
246+
247+
return positions
248+
186249
def _do_execute_many(
187250
self, prefix, values, postfix, args, max_stmt_length, encoding
188251
):
189252
conn = self._get_db()
190-
escape = self._escape_args
191253
if isinstance(prefix, str):
192254
prefix = prefix.encode(encoding)
193255
if isinstance(postfix, str):
194256
postfix = postfix.encode(encoding)
257+
258+
# Pre-compute placeholder positions
259+
placeholder_positions = self._find_placeholder_positions(values)
260+
195261
sql = prefix
196262
args = iter(args)
197-
v = values % escape(next(args), conn)
263+
264+
if not placeholder_positions:
265+
values_bytes = values.encode(encoding, "surrogateescape") if isinstance(values, str) else values
266+
sql += values_bytes
267+
rows = 0
268+
for _ in args:
269+
if len(sql) + len(values_bytes) + len(postfix) + 2 > max_stmt_length:
270+
rows += self.execute(sql + postfix)
271+
sql = prefix + values_bytes
272+
else:
273+
sql += ",".encode(encoding)
274+
sql += values_bytes
275+
rows += self.execute(sql + postfix)
276+
self.rowcount = rows
277+
return rows
278+
279+
template_parts = []
280+
last_pos = 0
281+
for pos, length in placeholder_positions:
282+
template_parts.append(values[last_pos:pos])
283+
last_pos = pos + length
284+
template_parts.append(values[last_pos:])
285+
286+
def format_values_fast(escaped_arg):
287+
if len(escaped_arg) != len(placeholder_positions):
288+
return values
289+
result = template_parts[0]
290+
for i, val in enumerate(escaped_arg):
291+
result += str(val) + template_parts[i + 1]
292+
return result
293+
294+
def format_values_with_positions(arg):
295+
escaped_arg = self._escape_args(arg, conn)
296+
if not isinstance(escaped_arg, (tuple, list)):
297+
escaped_arg = (escaped_arg,)
298+
return format_values_fast(escaped_arg)
299+
300+
v = format_values_with_positions(next(args))
198301
if isinstance(v, str):
199302
v = v.encode(encoding, "surrogateescape")
200303
sql += v
201304
rows = 0
305+
202306
for arg in args:
203-
v = values % escape(arg, conn)
307+
v = format_values_with_positions(arg)
204308
if isinstance(v, str):
205309
v = v.encode(encoding, "surrogateescape")
206-
if len(sql) + len(v) + len(postfix) + 1 > max_stmt_length:
310+
if len(sql) + len(v) + len(postfix) + 2 > max_stmt_length: # +2 for comma
207311
rows += self.execute(sql + postfix)
208-
sql = prefix
312+
sql = prefix + v
209313
else:
210314
sql += ",".encode(encoding)
211-
sql += v
315+
sql += v
212316
rows += self.execute(sql + postfix)
213317
self.rowcount = rows
214318
return rows

0 commit comments

Comments
 (0)