Skip to content

Commit 151aae4

Browse files
committed
fix double counting
1 parent 6cb2cbb commit 151aae4

File tree

4 files changed

+7
-13
lines changed

4 files changed

+7
-13
lines changed

opteryx/__version__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# THIS FILE IS AUTOMATICALLY UPDATED DURING THE BUILD PROCESS
22
# DO NOT EDIT THIS FILE DIRECTLY
33

4-
__build__ = 1714
4+
__build__ = 1715
55
__author__ = "@joocer"
6-
__version__ = "0.26.0-beta.1714"
6+
__version__ = "0.26.0-beta.1715"
77

88
# Store the version here so:
99
# 1) we don't load dependencies by storing it in __init__.py

opteryx/connectors/disk_connector.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -237,10 +237,9 @@ def read_dataset(
237237
return
238238

239239
remaining_rows = limit if limit is not None else float("inf")
240-
last_morsel = None
241240

242241
def process_result(num_rows, raw_size, decoded):
243-
nonlocal remaining_rows, last_morsel
242+
nonlocal remaining_rows
244243
if decoded.num_rows > remaining_rows:
245244
decoded = decoded.slice(0, remaining_rows)
246245
remaining_rows -= decoded.num_rows
@@ -249,7 +248,6 @@ def process_result(num_rows, raw_size, decoded):
249248
self.rows_seen += num_rows
250249
self.blobs_seen += 1
251250
self.statistics.bytes_raw += raw_size
252-
last_morsel = decoded
253251
return decoded
254252

255253
max_workers = min(self._max_workers, len(blob_names)) or 1
@@ -340,12 +338,8 @@ def process_result(num_rows, raw_size, decoded):
340338
if remaining_rows <= 0:
341339
break
342340

343-
if last_morsel is not None:
344-
self.statistics.columns_read += last_morsel.num_columns
345-
elif columns:
346-
self.statistics.columns_read += len(columns)
347-
elif self.schema:
348-
self.statistics.columns_read += len(self.schema.columns)
341+
# column-level statistics are recorded by the read node after morsels
342+
# leave connector-level accounting to avoid double counting
349343

350344
def _read_blob_task(self, blob_name: str, columns, predicates):
351345
decoder = get_decoder(blob_name)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "opteryx"
3-
version = "0.26.0-beta.1714"
3+
version = "0.26.0-beta.1715"
44
description = "Query your data, where it lives"
55
requires-python = '>=3.11'
66
readme = {file = "README.md", content-type = "text/markdown"}

tests/unit/planner/test_projection_pushdown_parquet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import sys
33
import pytest
44

5-
sys.path.insert(1, os.path.join(sys.path[0], "../.."))
5+
sys.path.insert(1, os.path.join(sys.path[0], "../../.."))
66

77
import opteryx
88

0 commit comments

Comments
 (0)