Skip to content

Commit 7f1bf82

Browse files
authored
Merge pull request #2867 from mabel-dev/clickbench-performance-regression-investigation-1
Clickbench performance regression investigation 1
2 parents cca1cc9 + b3eb472 commit 7f1bf82

File tree

11 files changed

+277
-101
lines changed

11 files changed

+277
-101
lines changed

opteryx/__init__.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,10 @@
2525
from pathlib import Path
2626

2727
from decimal import getcontext
28-
from typing import Optional, Union, Dict, Any, List
28+
from typing import Optional, Union, Dict, Any, List, TYPE_CHECKING
2929

30-
import pyarrow
30+
if TYPE_CHECKING: # pragma: no cover - only for type checkers
31+
import pyarrow
3132

3233
# Set Decimal precision to 28 globally
3334
getcontext().prec = 28
@@ -51,16 +52,19 @@ def is_mac() -> bool: # pragma: no cover
5152

5253

5354
# python-dotenv allows us to create an environment file to store secrets.
54-
try:
55-
import dotenv # type:ignore
55+
# Only try to import dotenv if a .env file exists to avoid paying the
56+
# import cost when no environment file is present.
57+
_env_path = Path.cwd() / ".env"
58+
if _env_path.exists():
59+
try:
60+
import dotenv # type:ignore
5661

57-
_env_path = Path.cwd() / ".env"
58-
if _env_path.exists() and dotenv is not None:
5962
dotenv.load_dotenv(dotenv_path=_env_path)
6063
if OPTERYX_DEBUG:
6164
print(f"{datetime.datetime.now()} [LOADER] Loading `.env` file.")
62-
except ImportError: # pragma: no cover
63-
pass
65+
except ImportError: # pragma: no cover
66+
# dotenv is optional; if it's not installed, just continue.
67+
pass
6468

6569

6670
if OPTERYX_DEBUG: # pragma: no cover
@@ -179,7 +183,7 @@ def query_to_arrow(
179183
visibility_filters: Optional[Dict[str, Any]] = None,
180184
limit: int = None,
181185
**kwargs,
182-
) -> pyarrow.Table:
186+
) -> "pyarrow.Table":
183187
"""
184188
Helper function to execute a query and return a pyarrow Table.
185189

opteryx/__main__.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212

1313
import argparse
1414
import os
15-
import readline
1615
import sys
1716
import threading
1817
import time
@@ -25,10 +24,6 @@
2524
from opteryx.utils.sql import clean_statement
2625
from opteryx.utils.sql import remove_comments
2726

28-
29-
if readline:
30-
pass
31-
3227
# Define ANSI color codes
3328
ANSI_RED = "\u001b[31m"
3429
ANSI_RESET = "\u001b[0m"
@@ -209,6 +204,8 @@ def main():
209204

210205

211206
if __name__ == "__main__":
207+
import readline # pragma: no cover
208+
212209
try:
213210
main()
214211
except Exception as e:

opteryx/__version__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# THIS FILE IS AUTOMATICALLY UPDATED DURING THE BUILD PROCESS
22
# DO NOT EDIT THIS FILE DIRECTLY
33

4-
__build__ = 1685
4+
__build__ = 1690
55
__author__ = "@joocer"
6-
__version__ = "0.26.0-beta.1685"
6+
__version__ = "0.26.0-beta.1690"
77

88
# Store the version here so:
99
# 1) we don't load dependencies by storing it in __init__.py

opteryx/compiled/list_ops/list_in_string.pyx

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ numpy.import_array()
3333

3434
from cpython.bytes cimport PyBytes_AsString
3535
from libc.stdint cimport int32_t, uint8_t, uintptr_t
36-
from libc.string cimport memchr
36+
from libc.string cimport memchr, memcpy
3737
import platform
3838

3939

@@ -65,12 +65,18 @@ init_searcher()
6565

6666

6767
cdef inline int fast_memcmp_short(const char *a, const char *b, size_t n):
68-
"""Optimized memcmp for short strings (<= 8 bytes)"""
69-
cdef size_t i
70-
for i in range(n):
71-
if a[i] != b[i]:
72-
return 1
73-
return 0
68+
cdef uint64_t aval = 0, bval = 0
69+
cdef uint64_t mask
70+
71+
if n == 0:
72+
return 0
73+
elif n <= 8:
74+
mask = ((<uint64_t>1) << (8 * n)) - 1
75+
memcpy(&aval, a, n)
76+
memcpy(&bval, b, n)
77+
return (aval & mask) != (bval & mask)
78+
else:
79+
return memcmp(a, b, n) != 0
7480

7581

7682
cdef inline int boyer_moore_horspool(const char *haystack, size_t haystacklen,
@@ -146,7 +152,9 @@ cdef inline int boyer_moore_horspool_with_table(const char *haystack, size_t hay
146152
"""BMH with precomputed table - optimized version"""
147153
cdef size_t i = 0
148154
cdef size_t tail_index
155+
cdef size_t needlelen_sub1 = needlelen - 1
149156
cdef unsigned char last_char
157+
cdef unsigned char tail_char
150158

151159
if needlelen == 0 or haystacklen < needlelen:
152160
return 0
@@ -155,20 +163,24 @@ cdef inline int boyer_moore_horspool_with_table(const char *haystack, size_t hay
155163
if needlelen == 1:
156164
return memchr(haystack, needle[0], haystacklen) != NULL
157165

158-
last_char = <unsigned char>needle[needlelen - 1]
166+
last_char = <unsigned char>needle[needlelen_sub1]
167+
cdef size_t end_index = haystacklen - needlelen
159168

160-
while i <= haystacklen - needlelen:
161-
# Check last character first
162-
if haystack[i + needlelen - 1] == last_char:
163-
if needlelen <= 8:
164-
if fast_memcmp_short(&haystack[i], needle, needlelen) == 0:
165-
return 1
166-
else:
167-
if memcmp(&haystack[i], needle, needlelen) == 0:
168-
return 1
169+
while i <= end_index:
170+
tail_index = i + needlelen_sub1
171+
tail_char = <unsigned char>haystack[tail_index]
169172

170-
tail_index = i + needlelen - 1
171-
i += skip[<unsigned char>haystack[tail_index]]
173+
# Check last character first
174+
if tail_char == last_char:
175+
if haystack[i] == needle[0]:
176+
if needlelen <= 8:
177+
if fast_memcmp_short(&haystack[i], needle, needlelen) == 0:
178+
return 1
179+
else:
180+
if memcmp(&haystack[i], needle, needlelen) == 0:
181+
return 1
182+
183+
i += skip[tail_char]
172184

173185
return 0
174186

opteryx/config.py

Lines changed: 69 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@
1010
from typing import Optional
1111
from typing import Union
1212

13-
import psutil
14-
1513
_config_values: dict = {}
1614

1715
# we need a preliminary version of this variable
@@ -30,8 +28,16 @@ def memory_allocation_calculation(allocation: Union[float, int]) -> int:
3028
Returns:
3129
int: Memory size in bytes to be allocated.
3230
"""
33-
total_memory = psutil.virtual_memory().total # Convert bytes to megabytes
3431

32+
# Import psutil lazily to avoid paying the import cost at module import time.
33+
# Use a small helper so tests or callers that need the value will trigger the
34+
# import only when this function is called.
35+
def _get_total_memory_bytes() -> int:
36+
import psutil
37+
38+
return psutil.virtual_memory().total
39+
40+
total_memory = _get_total_memory_bytes()
3541
if 0 < allocation < 1: # Treat as a percentage
3642
return int(total_memory * allocation)
3743
elif allocation >= 1: # Treat as an absolute value in MB
@@ -44,9 +50,13 @@ def system_gigabytes() -> int:
4450
"""
4551
Get the total system memory in gigabytes.
4652
53+
This imports psutil lazily to avoid paying the cost at module import time.
54+
4755
Returns:
4856
int: Total system memory in gigabytes.
4957
"""
58+
import psutil
59+
5060
return psutil.virtual_memory().total // (1024 * 1024 * 1024)
5161

5262

@@ -163,20 +173,67 @@ def get(key: str, default: Optional[typing.Any] = None) -> Optional[typing.Any]:
163173
MAX_CONSECUTIVE_CACHE_FAILURES: int = int(get("MAX_CONSECUTIVE_CACHE_FAILURES", 10))
164174
"""Maximum number of consecutive cache failures before disabling cache usage."""
165175

166-
MAX_LOCAL_BUFFER_CAPACITY: int = memory_allocation_calculation(float(get("MAX_LOCAL_BUFFER_CAPACITY", 0.2)))
167-
"""Local buffer pool size in either bytes or fraction of system memory."""
176+
# These values are computed lazily via __getattr__ to avoid importing
177+
# psutil (and making expensive system calls) during module import.
178+
# Annotate the names so type checkers know about them, but do not assign
179+
# values here — __getattr__ will compute and cache them on first access.
180+
MAX_LOCAL_BUFFER_CAPACITY: int
181+
"""Local buffer pool size in either bytes or fraction of system memory (lazy)."""
168182

169-
MAX_READ_BUFFER_CAPACITY: int = memory_allocation_calculation(float(get("MAX_READ_BUFFER_CAPACITY", 0.1)))
170-
"""Read buffer pool size in either bytes or fraction of system memory."""
183+
MAX_READ_BUFFER_CAPACITY: int
184+
"""Read buffer pool size in either bytes or fraction of system memory (lazy)."""
171185

172186
MAX_STATISTICS_CACHE_ITEMS: int = get("MAX_STATISTICS_CACHE_ITEMS", 10_000)
173187
"""The number of .parquet files we cache the statistics for."""
174188

175-
CONCURRENT_READS: int = int(get("CONCURRENT_READS", max(system_gigabytes(), 2)))
176-
"""Number of read workers per data source."""
189+
_LAZY_VALUES: dict = {}
190+
191+
192+
# Lazily computed configuration values. We compute certain values on first
193+
# access because they depend on expensive system calls (psutil) or other
194+
# runtime properties. Access these as attributes on the module; __getattr__
195+
# will compute and cache them.
196+
197+
CONCURRENT_WORKERS_DEFAULT = int(get("CONCURRENT_WORKERS", 2))
198+
199+
200+
def _compute_MAX_LOCAL_BUFFER_CAPACITY():
201+
return memory_allocation_calculation(float(get("MAX_LOCAL_BUFFER_CAPACITY", 0.2)))
202+
203+
204+
def _compute_MAX_READ_BUFFER_CAPACITY():
205+
return memory_allocation_calculation(float(get("MAX_READ_BUFFER_CAPACITY", 0.1)))
206+
207+
208+
def _compute_CONCURRENT_READS():
209+
# default to max(system_gigabytes(), 2)
210+
return int(get("CONCURRENT_READS", max(system_gigabytes(), 2)))
211+
177212

178-
CONCURRENT_WORKERS: int = int(get("CONCURRENT_WORKERS", 2))
179-
"""Number of worker threads created to execute queries."""
213+
def __getattr__(name: str):
214+
"""Lazy attribute access for computed config values."""
215+
if name == "MAX_LOCAL_BUFFER_CAPACITY":
216+
val = _LAZY_VALUES.get(name)
217+
if val is None:
218+
val = _compute_MAX_LOCAL_BUFFER_CAPACITY()
219+
_LAZY_VALUES[name] = val
220+
return val
221+
if name == "MAX_READ_BUFFER_CAPACITY":
222+
val = _LAZY_VALUES.get(name)
223+
if val is None:
224+
val = _compute_MAX_READ_BUFFER_CAPACITY()
225+
_LAZY_VALUES[name] = val
226+
return val
227+
if name == "CONCURRENT_READS":
228+
val = _LAZY_VALUES.get(name)
229+
if val is None:
230+
val = _compute_CONCURRENT_READS()
231+
_LAZY_VALUES[name] = val
232+
return val
233+
if name == "CONCURRENT_WORKERS":
234+
# simple default, no expensive computation
235+
return CONCURRENT_WORKERS_DEFAULT
236+
raise AttributeError(name)
180237

181238
DATA_CATALOG_PROVIDER: str = get("DATA_CATALOG_PROVIDER")
182239
"""Data Catalog provider."""
@@ -197,6 +254,7 @@ def get(key: str, default: Optional[typing.Any] = None) -> Optional[typing.Any]:
197254
# don't output resource (memory) utilization information
198255
ENABLE_RESOURCE_LOGGING: bool = bool(get("ENABLE_RESOURCE_LOGGING", False))
199256
# size of morsels to push between steps
257+
# MORSEL_SIZE remains a plain constant
200258
MORSEL_SIZE: int = int(get("MORSEL_SIZE", 64 * 1024 * 1024))
201259
# not GA
202260
PROFILE_LOCATION:str = get("PROFILE_LOCATION")

opteryx/connectors/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,6 @@ def get_dataset_schema(self, dataset):
8787

8888
import os
8989

90-
import pyarrow
91-
9290
# Lazy imports - connectors are only loaded when actually needed
9391
# This significantly improves module import time from ~500ms to ~130ms
9492

@@ -195,6 +193,8 @@ def register_store(prefix, connector, *, remove_prefix: bool = False, **kwargs):
195193
def register_df(name, frame):
196194
"""register a orso, pandas or Polars dataframe"""
197195
# Lazy import ArrowConnector
196+
import pyarrow
197+
198198
from opteryx.connectors.arrow_connector import ArrowConnector
199199

200200
# polars (maybe others) - the polars to arrow API is a mess

0 commit comments

Comments
 (0)