Skip to content

Commit e5fbb15

Browse files
authored
Merge pull request #290 from chdb-io/patchset-2.2.0b2
Refactor query execution to use connection object
2 parents 01afb91 + 9bbfd3a commit e5fbb15

28 files changed

+806
-415
lines changed

.github/workflows/pr_ci.yaml

Lines changed: 17 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -9,33 +9,23 @@ jobs:
99
env:
1010
PYTHON_VERSIONS: "3.11"
1111

12-
runs-on: self-hosted
12+
runs-on: gh-64c
1313
steps:
14-
- name: Check for chdb directory
15-
run: |
16-
if [ ! -d "/home/ubuntu/pr_runner/chdb" ]; then
17-
echo "chdb directory does not exist. Checkout the repository."
18-
mkdir -p /home/ubuntu/pr_runner/
19-
git clone https://github.com/chdb-io/chdb.git /home/ubuntu/pr_runner/chdb
20-
fi
14+
- name: Clone chDB repository
15+
uses: actions/checkout@v2
16+
with:
17+
repository: "chdb-io/chdb"
18+
ref: "refs/pull/${{ github.event.pull_request.number }}/merge"
19+
token: ${{ secrets.GITHUB_TOKEN }}
2120

22-
- name: Cleanup and update chdb directory
23-
run: |
24-
cd /home/ubuntu/pr_runner/chdb
25-
git fetch origin || true
26-
git fetch origin +refs/heads/*:refs/remotes/origin/* +refs/pull/${{ github.event.pull_request.number }}/merge:refs/remotes/pull/${{ github.event.pull_request.number }}/merge || true
27-
git reset --hard origin/${{ github.head_ref }} || true
28-
git clean -fdx || true
29-
git checkout --progress --force refs/remotes/pull/${{ github.event.pull_request.number }}/merge || true
30-
git status -v || true
31-
continue-on-error: true
21+
- name: Setup Python
22+
uses: actions/setup-python@v2
23+
with:
24+
python-version: 3.11
3225

33-
- name: Code style check
34-
run: |
35-
export PYENV_ROOT="$HOME/.pyenv"
36-
[[ -d $PYENV_ROOT/bin ]] && export PATH="$PYENV_ROOT/bin:$PATH"
37-
eval "$(pyenv init -)"
38-
pyenv local 3.11
39-
python3 -m pip install flake8
40-
cd chdb && python3 -m flake8
41-
working-directory: /home/ubuntu/pr_runner/chdb
26+
- name: Install flake8
27+
run: python -m pip install flake8
28+
29+
- name: Run flake8 on chdb directory
30+
run: cd chdb && flake8 .
31+

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
*.logrt
1313

1414
/python_pkg/
15+
minitest/
1516
/tmps
1617
/bak
1718
*state_tmp_*

chdb/__init__.py

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import sys
22
import os
3+
import threading
34

45

56
class ChdbError(Exception):
@@ -29,7 +30,9 @@ class ChdbError(Exception):
2930
from . import _chdb # noqa
3031

3132
os.chdir(cwd)
32-
engine_version = str(_chdb.query("SELECT version();", "CSV").bytes())[3:-4]
33+
conn = _chdb.connect()
34+
engine_version = str(conn.query("SELECT version();", "CSV").bytes())[3:-4]
35+
conn.close()
3336
else:
3437
raise NotImplementedError("Python 3.6 or lower version is not supported")
3538

@@ -64,18 +67,44 @@ def to_df(r):
6467
return t.to_pandas(use_threads=True)
6568

6669

70+
# global connection lock, for multi-threading use of legacy chdb.query()
71+
g_conn_lock = threading.Lock()
72+
73+
6774
# wrap _chdb functions
6875
def query(sql, output_format="CSV", path="", udf_path=""):
6976
global g_udf_path
7077
if udf_path != "":
7178
g_udf_path = udf_path
79+
conn_str = ""
80+
if path == "":
81+
conn_str = ":memory:"
82+
else:
83+
conn_str = f"{path}"
84+
if g_udf_path != "":
85+
if "?" in conn_str:
86+
conn_str = f"{conn_str}&udf_path={g_udf_path}"
87+
else:
88+
conn_str = f"{conn_str}?udf_path={g_udf_path}"
89+
if output_format == "Debug":
90+
output_format = "CSV"
91+
if "?" in conn_str:
92+
conn_str = f"{conn_str}&verbose&log-level=test"
93+
else:
94+
conn_str = f"{conn_str}?verbose&log-level=test"
95+
7296
lower_output_format = output_format.lower()
7397
result_func = _process_result_format_funs.get(lower_output_format, lambda x: x)
7498
if lower_output_format in _arrow_format:
7599
output_format = "Arrow"
76-
res = _chdb.query(sql, output_format, path=path, udf_path=g_udf_path)
77-
if res.has_error():
78-
raise ChdbError(res.error_message())
100+
101+
with g_conn_lock:
102+
conn = _chdb.connect(conn_str)
103+
res = conn.query(sql, output_format)
104+
if res.has_error():
105+
conn.close()
106+
raise ChdbError(res.error_message())
107+
conn.close()
79108
return result_func(res)
80109

81110

chdb/build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${build_type} -DENABLE_THINLTO=0 -DENABLE_TESTS=0
9393
-DENABLE_PROTOBUF=1 -DENABLE_THRIFT=1 -DENABLE_MSGPACK=1 \
9494
-DENABLE_BROTLI=1 -DENABLE_H3=1 -DENABLE_CURL=1 \
9595
-DENABLE_CLICKHOUSE_ALL=0 -DUSE_STATIC_LIBRARIES=1 -DSPLIT_SHARED_LIBRARIES=0 \
96-
-DENABLE_SIMDJSON=1 \
96+
-DENABLE_SIMDJSON=1 -DENABLE_RAPIDJSON=1 \
9797
${CPU_FEATURES} \
9898
${CMAKE_TOOLCHAIN_FILE} \
9999
-DENABLE_AVX512=0 -DENABLE_AVX512_VBMI=0 \

chdb/session/state.py

Lines changed: 77 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,100 @@
11
import tempfile
22
import shutil
3+
import warnings
34

4-
from chdb import query
5+
import chdb
6+
from ..state import sqlitelike as chdb_stateful
7+
8+
9+
g_session = None
10+
g_session_path = None
511

612

713
class Session:
814
"""
9-
Session will keep the state of query. All DDL and DML state will be kept in a dir.
10-
Dir path could be passed in as an argument. If not, a temporary dir will be created.
15+
Session will keep the state of query.
16+
If path is None, it will create a temporary directory and use it as the database path
17+
and the temporary directory will be removed when the session is closed.
18+
You can also pass in a path to create a database at that path where will keep your data.
19+
20+
You can also use a connection string to pass in the path and other parameters.
21+
Examples:
22+
- ":memory:" (for in-memory database)
23+
- "test.db" (for relative path)
24+
- "file:test.db" (same as above)
25+
- "/path/to/test.db" (for absolute path)
26+
- "file:/path/to/test.db" (same as above)
27+
- "file:test.db?param1=value1&param2=value2" (for relative path with query params)
28+
- "file::memory:?verbose&log-level=test" (for in-memory database with query params)
29+
- "///path/to/test.db?param1=value1&param2=value2" (for absolute path)
1130
12-
If path is not specified, the temporary dir will be deleted when the Session object is deleted.
13-
Otherwise path will be kept.
31+
Connection string args handling:
32+
Connection string can contain query params like "file:test.db?param1=value1&param2=value2"
33+
"param1=value1" will be passed to ClickHouse engine as start up args.
1434
15-
Note: The default database is "_local" and the default engine is "Memory" which means all data
16-
will be stored in memory. If you want to store data in disk, you should create another database.
35+
For more details, see `clickhouse local --help --verbose`
36+
Some special args handling:
37+
- "mode=ro" would be "--readonly=1" for clickhouse (read-only mode)
38+
39+
Important:
40+
- There can be only one session at a time. If you want to create a new session, you need to close the existing one.
41+
- Creating a new session will close the existing one.
1742
"""
1843

1944
def __init__(self, path=None):
45+
global g_session, g_session_path
46+
if g_session is not None:
47+
warnings.warn(
48+
"There is already an active session. Creating a new session will close the existing one. "
49+
"It is recommended to close the existing session before creating a new one. "
50+
f"Closing the existing session {g_session_path}"
51+
)
52+
g_session.close()
53+
g_session_path = None
2054
if path is None:
2155
self._cleanup = True
2256
self._path = tempfile.mkdtemp()
2357
else:
2458
self._cleanup = False
2559
self._path = path
60+
if chdb.g_udf_path != "":
61+
self._udf_path = chdb.g_udf_path
62+
# add udf_path to conn_str here.
63+
# - the `user_scripts_path` will be the value of `udf_path`
64+
# - the `user_defined_executable_functions_config` will be `user_scripts_path/*.xml`
65+
# Both of them will be added to the conn_str in the Connection class
66+
if "?" in self._path:
67+
self._conn_str = f"{self._path}&udf_path={self._udf_path}"
68+
else:
69+
self._conn_str = f"{self._path}?udf_path={self._udf_path}"
70+
else:
71+
self._udf_path = ""
72+
self._conn_str = f"{self._path}"
73+
self._conn = chdb_stateful.Connection(self._conn_str)
74+
g_session = self
75+
g_session_path = self._path
2676

2777
def __del__(self):
28-
if self._cleanup:
29-
self.cleanup()
78+
self.close()
3079

3180
def __enter__(self):
3281
return self
3382

3483
def __exit__(self, exc_type, exc_value, traceback):
35-
self.cleanup()
84+
self.close()
85+
86+
def close(self):
87+
if self._cleanup:
88+
self.cleanup()
89+
if self._conn is not None:
90+
self._conn.close()
91+
self._conn = None
3692

3793
def cleanup(self):
3894
try:
95+
if self._conn is not None:
96+
self._conn.close()
97+
self._conn = None
3998
shutil.rmtree(self._path)
4099
except: # noqa
41100
pass
@@ -44,7 +103,14 @@ def query(self, sql, fmt="CSV", udf_path=""):
44103
"""
45104
Execute a query.
46105
"""
47-
return query(sql, fmt, path=self._path, udf_path=udf_path)
106+
if fmt == "Debug":
107+
warnings.warn(
108+
"""Debug format is not supported in Session.query
109+
Please try use parameters in connection string instead:
110+
Eg: conn = connect(f"db_path?verbose&log-level=test")"""
111+
)
112+
fmt = "CSV"
113+
return self._conn.query(sql, fmt)
48114

49115
# alias sql = query
50116
sql = query

chdb/state/sqlitelike.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,13 +109,14 @@ def connect(connection_string: str = ":memory:") -> Connection:
109109
110110
Args:
111111
connection_string (str, optional): Connection string. Defaults to ":memory:".
112-
Aslo support file path like:
112+
Also support file path like:
113113
- ":memory:" (for in-memory database)
114114
- "test.db" (for relative path)
115115
- "file:test.db" (same as above)
116116
- "/path/to/test.db" (for absolute path)
117117
- "file:/path/to/test.db" (same as above)
118118
- "file:test.db?param1=value1&param2=value2" (for relative path with query params)
119+
- "file::memory:?verbose&log-level=test" (for in-memory database with query params)
119120
- "///path/to/test.db?param1=value1&param2=value2" (for absolute path)
120121
121122
Connection string args handling:

chdb/test_smoke.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ python3 -c \
2222
"import chdb; res = chdb._chdb.query('select version()', 'CSV'); print(res)"
2323

2424
python3 -c \
25-
"import chdb; res = chdb.query('select version()', 'CSV'); print(res.bytes())"
25+
"import chdb; res = chdb.query('select version()', 'Debug'); print(res.bytes())"
2626

2727
# test json function
2828
python3 -c \

0 commit comments

Comments
 (0)