Skip to content

Commit e5f079d

Browse files
committed
cython performance
1 parent 675d15c commit e5f079d

14 files changed

+701
-195
lines changed

opteryx/__version__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# THIS FILE IS AUTOMATICALLY UPDATED DURING THE BUILD PROCESS
22
# DO NOT EDIT THIS FILE DIRECTLY
33

4-
__build__ = 1665
4+
__build__ = 1666
55
__author__ = "@joocer"
6-
__version__ = "0.26.0-beta.1665"
6+
__version__ = "0.26.0-beta.1666"
77

88
# Store the version here so:
99
# 1) we don't load dependencies by storing it in __init__.py

opteryx/compiled/structures/jsonl_decoder.pyx

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@ cdef inline void extract_all_values(
113113
cdef int quote_offset
114114
cdef int i
115115
cdef int found_count = 0
116+
cdef int key_matched = 0
117+
cdef const char* dummy_ptr
118+
cdef Py_ssize_t dummy_len
116119

117120
while pos < end and found_count < num_cols:
118121
# Find opening quote for key
@@ -137,6 +140,7 @@ cdef inline void extract_all_values(
137140

138141
# Check if this key matches any of our columns
139142
key_len = quote_offset
143+
key_matched = 0
140144
for i in range(num_cols):
141145
if not found_flags[i] and key_lengths[i] == key_len:
142146
if memcmp(key_start, key_ptrs[i], <size_t>key_len) == 0:
@@ -156,9 +160,27 @@ cdef inline void extract_all_values(
156160
if extract_value(value_start, end - value_start, &value_ptrs[i], &value_lens[i]):
157161
found_flags[i] = 1
158162
found_count += 1
163+
# Advance position past the extracted value
164+
pos = value_start + value_lens[i]
165+
key_matched = 1
159166
break
160167

161-
pos = key_start + key_len + 1
168+
# If key didn't match any requested columns, skip over its value
169+
if not key_matched:
170+
value_start = key_start + key_len + 1 # Skip closing quote of key
171+
172+
# Skip whitespace and colon
173+
while value_start < end and (value_start[0] in (32, 9, 13, 58)):
174+
value_start += 1
175+
176+
if value_start < end:
177+
# Extract (and discard) the value to advance position correctly
178+
if extract_value(value_start, end - value_start, &dummy_ptr, &dummy_len):
179+
pos = value_start + dummy_len
180+
else:
181+
pos = key_start + key_len + 1
182+
else:
183+
pos = key_start + key_len + 1
162184

163185
cdef inline int extract_value(
164186
const char* value_start, Py_ssize_t remaining,

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "opteryx"
3-
version = "0.26.0-beta.1665"
3+
version = "0.26.0-beta.1666"
44
description = "Query your data, where it lives"
55
requires-python = '>=3.11'
66
readme = {file = "README.md", content-type = "text/markdown"}

test_simd_delimiter.py

Lines changed: 0 additions & 164 deletions
This file was deleted.

0 commit comments

Comments
 (0)