Skip to content

Commit 345e1af

Browse files
committed
feat: parquet reader
1 parent e8485ac commit 345e1af

File tree

16 files changed

+782
-17
lines changed

16 files changed

+782
-17
lines changed

dev/build_counter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class VersionStatus(Enum):
2929

3030
__major_version__ = 0
3131
__minor_version__ = 6
32-
__revision_version__ = 29
32+
__revision_version__ = 30
3333
__author__ = "@joocer"
3434
__status__ = VersionStatus.RELEASE
3535

opteryx/__version__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# THIS FILE IS AUTOMATICALLY UPDATED DURING THE BUILD PROCESS
22
# DO NOT EDIT THIS FILE DIRECTLY
33

4-
__build__ = 361
4+
__build__ = 364
55
__author__ = "@joocer"
6-
__version__ = "0.6.29"
6+
__version__ = "0.6.30"
77
__lib__ = "opteryx-core"
8-
__build_date__ = "2026-02-26T21:07:51.277997+00:00Z"
8+
__build_date__ = "2026-02-26T23:35:55.839493+00:00Z"
99

1010
# Store the version here so:
1111
# 1) we don't load dependencies by storing it in __init__.py

opteryx/compiled/aggregations/group_state_store.pyx

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,9 @@ from opteryx.compiled.aggregations.aggregate_kernels cimport update_state
2121
from opteryx.draken.vectors.int64_vector cimport Int64Vector
2222
from opteryx.draken.vectors.float64_vector cimport Float64Vector
2323
from opteryx.draken.vectors.integer_vector cimport IntegerVector
24-
from opteryx.draken.vectors.int64_vector cimport from_sequence as int64_from_sequence
25-
from opteryx.draken.vectors.float64_vector cimport from_sequence as float64_from_sequence
2624

2725
from libc.stdint cimport int8_t, int16_t, int32_t, int64_t, uint8_t, uint64_t
2826
from libc.stdlib cimport malloc, free
29-
from libc.math cimport NAN
3027
from libc.stddef cimport size_t
3128
from cython.operator cimport dereference, preincrement
3229
from opteryx.third_party.abseil.containers cimport IdentityHash
@@ -273,7 +270,6 @@ cdef class GroupStateStore:
273270
cdef uint64_t[::1] key_hashes
274271
cdef uint64_t key_hash
275272
cdef uint64_t distinct_value_u64
276-
cdef IntegerVector key_int_vector
277273
cdef IntegerVector value_int_vector
278274
cdef DrakenFixedBuffer* int_value_ptr
279275
cdef uint64_t* _narrow_key_buf

opteryx/config.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,11 @@ class Features:
177177
disable_predicate_ordering = bool(get("FEATURE_DISABLE_PREDICATE_ORDERING", False))
178178
disable_predicate_pushdown = bool(get("FEATURE_DISABLE_PREDICATE_PUSHDOWN", False))
179179
disable_manifest_pruning = bool(get("FEATURE_DISABLE_MANIFEST_PRUNING", False))
180+
use_parquet_reader = str(get("FEATURE_USE_PARQUET_READER", "1")).lower() in (
181+
"1",
182+
"true",
183+
"yes",
184+
)
180185

181186

182187
features = Features()

opteryx/connectors/io_systems/gcs_filesystem.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import os
1010
import urllib.parse
1111
from typing import List
12+
from typing import Tuple
1213
from typing import Union
1314

1415
from opteryx.exceptions import DatasetReadError
@@ -137,6 +138,41 @@ def get_file_info(self, paths: Union[str, List[str]]):
137138

138139
return infos[0] if single_path else infos
139140

141+
def read_ranges(self, path: str, ranges: List[Tuple[int, int]]) -> List[bytes]:
142+
"""Read multiple byte ranges from a GCS object using HTTP range requests.
143+
144+
Args:
145+
path: GCS object path, with or without the ``gs://`` prefix.
146+
ranges: List of (offset, length) tuples specifying byte ranges to read.
147+
148+
Returns:
149+
List of byte buffers in the same order as ranges.
150+
"""
151+
# Normalize path
152+
if path.startswith("gs://"):
153+
path = path[5:]
154+
155+
from opteryx.utils import paths as path_utils
156+
157+
bucket, _, _, _ = path_utils.get_parts(path)
158+
object_full_path = urllib.parse.quote(path[(len(bucket) + 1) :], safe="")
159+
url = f"https://storage.googleapis.com/{bucket}/{object_full_path}"
160+
161+
result = []
162+
for offset, length in ranges:
163+
# GCS range request: Range: bytes=offset-end (inclusive)
164+
end = offset + length - 1
165+
response = self.session.get(
166+
url,
167+
headers={
168+
"Authorization": f"Bearer {self.access_token}",
169+
"Range": f"bytes={offset}-{end}",
170+
},
171+
timeout=30,
172+
)
173+
result.append(response.content)
174+
return result
175+
140176
def stream_to(self, path: str, sink, chunk_size: int = 1 << 20) -> int:
141177
"""Stream a GCS object directly into *sink* without an intermediate buffer.
142178

opteryx/connectors/io_systems/local_filesystem.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
import datetime
99
import os
10+
from typing import List
11+
from typing import Tuple
1012

1113

1214
class MemoryMappedFile:
@@ -214,6 +216,24 @@ def get_file_info(self, paths):
214216

215217
return infos[0] if single_path else infos
216218

219+
def read_ranges(self, path: str, ranges: List[Tuple[int, int]]) -> List[bytes]:
220+
"""Read multiple byte ranges from a local file.
221+
222+
Args:
223+
path: Absolute or relative path to the local file.
224+
ranges: List of (offset, length) tuples specifying byte ranges to read.
225+
226+
Returns:
227+
List of byte buffers in the same order as ranges.
228+
"""
229+
result = []
230+
with open(path, "rb") as f:
231+
for offset, length in ranges:
232+
f.seek(offset)
233+
chunk = f.read(length)
234+
result.append(chunk)
235+
return result
236+
217237
def stream_to(self, path: str, sink, chunk_size: int = 1 << 20) -> int:
218238
"""Stream a local file directly into *sink* without an intermediate buffer.
219239

opteryx/connectors/io_systems/s3_filesystem.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,14 @@
1010
from dataclasses import dataclass
1111
from typing import List
1212
from typing import Optional
13+
from typing import Tuple
1314
from typing import Union
1415

1516
from minio.select import OutputSerialization
1617
from minio.xml import SubElement
1718

1819
from opteryx.connectors.capabilities import PredicatePushable
20+
from opteryx.exceptions import DatasetReadError
1921
from opteryx.exceptions import MissingDependencyError
2022
from opteryx.exceptions import UnmetRequirementError
2123
from opteryx.third_party.alantsd.base64 import encode
@@ -255,6 +257,37 @@ def get_file_info(self, paths: Union[str, List[str]]):
255257

256258
return infos[0] if single_path else infos
257259

260+
def read_ranges(self, path: str, ranges: List[Tuple[int, int]]) -> List[bytes]:
261+
"""Read multiple byte ranges from an S3 object using HTTP range requests.
262+
263+
Args:
264+
path: S3 object path including bucket as first component
265+
(e.g. ``my-bucket/path/to/file.parquet``).
266+
ranges: List of (offset, length) tuples specifying byte ranges to read.
267+
268+
Returns:
269+
List of byte buffers in the same order as ranges.
270+
"""
271+
from opteryx.utils import paths as path_utils
272+
273+
bucket, object_path, name, extension = path_utils.get_parts(path)
274+
full_object_name = object_path + "/" + name + extension
275+
276+
result = []
277+
for offset, length in ranges:
278+
response = self.minio.get_object(
279+
bucket_name=bucket,
280+
object_name=full_object_name,
281+
offset=offset,
282+
length=length,
283+
)
284+
try:
285+
chunk = response.read()
286+
result.append(chunk)
287+
finally:
288+
response.close()
289+
return result
290+
258291
def stream_to(self, path: str, sink, chunk_size: int = 1 << 20) -> int:
259292
"""Stream an S3 object directly into *sink* without an intermediate buffer.
260293

opteryx/operators/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ def execute(self, morsel):
9090
from .aggregate_node import AggregateNode # aggregate data
9191
from .iops_read_node import IopsReadNode
9292
from .null_reader_node import NullReaderNode # empty table for contradictory predicates
93+
from .parquet_read_node import ParquetReadNode
9394
from .simple_aggregate_node import SimpleAggregateNode # aggregate data
9495
from .simple_aggregate_and_group_node import SimpleAggregateAndGroupNode # aggregate data
9596

@@ -136,6 +137,7 @@ def execute(self, morsel):
136137
"AggregateNode",
137138
"IopsReadNode",
138139
"NullReaderNode",
140+
"ParquetReadNode",
139141
"SimpleAggregateNode",
140142
"SimpleAggregateAndGroupNode",
141143
"CrossJoinNode",
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License");
2+
# you may not use this file except in compliance with the License.
3+
# See the License at http://www.apache.org/licenses/LICENSE-2.0
4+
# Distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND.
5+
6+
"""
7+
Parquet Read Node
8+
9+
SQL Query Execution Plan Node that reads Parquet files using the column-chunk
10+
range-read design (docs/parquet-column-reads-design.md).
11+
12+
Instead of downloading whole blobs into a shared-memory ring, this node:
13+
14+
1. Fetches the Parquet footer for each file (two small range reads each).
15+
2. Fans out (file × row-group) work units to a thread pool.
16+
3. For each unit, batches all projected column ranges into one read_ranges()
17+
call, decodes with rugo, and yields the assembled row group.
18+
19+
The filesystem layer is taken directly from the connector (every catalog-backed
20+
connector already exposes ``self.filesystem``), so this node works identically
21+
for local disk, GCS, and S3.
22+
23+
Row groups are yielded in completion order — the thread pool handles overlap
24+
between I/O and decode across all files and row groups simultaneously.
25+
"""
26+
27+
from __future__ import annotations
28+
29+
import time
30+
from typing import Generator
31+
32+
from opteryx import EOS
33+
from opteryx.draken.morsels.morsel import Morsel
34+
from opteryx.models import QueryProperties
35+
from opteryx.parquet_io import InMemoryParquetCache
36+
from opteryx.parquet_io import fetch_footer
37+
from opteryx.parquet_io import iter_row_groups
38+
from opteryx.utils.file_decoders import get_decoder
39+
40+
from .read_node import ReaderNode
41+
42+
43+
class ParquetReadNode(ReaderNode):
44+
"""Read node backed by column-chunk range reads via ``parquet_io``.
45+
46+
Activated for filesystem-backed connectors (GCS, S3, local) when the
47+
manifest contains only ``.parquet`` files. Falls back to the existing
48+
``IopsReadNode`` / ``ReaderNode`` paths for mixed or non-Parquet manifests.
49+
"""
50+
51+
def __init__(self, properties: QueryProperties, **parameters) -> None:
52+
ReaderNode.__init__(self, properties=properties, **parameters)
53+
self.predicates = parameters.get("predicates")
54+
55+
@property
56+
def name(self) -> str: # pragma: no cover
57+
return "Parquet Read"
58+
59+
def to_mermaid(self, nid): # pragma: no cover
60+
mermaid = f'NODE_{nid}[("**{self.name.upper()}**<br />'
61+
mermaid += f"{self.connector.dataset}<br />"
62+
mermaid += f"({self.execution_time / 1_000_000:,.2f}ms)"
63+
return mermaid + '")]'
64+
65+
def execute(self, morsel, **kwargs) -> Generator:
66+
if morsel == EOS:
67+
yield None
68+
return
69+
70+
orso_schema = self.parameters["schema"]
71+
72+
# ── Empty manifest ────────────────────────────────────────────────────
73+
if not self.manifest or self.manifest.get_file_count() == 0:
74+
from orso import DataFrame
75+
76+
as_arrow = DataFrame(rows=[], schema=orso_schema).arrow()
77+
renames = [orso_schema.column(col).identity for col in as_arrow.column_names]
78+
as_arrow = as_arrow.rename_columns(renames)
79+
yield as_arrow
80+
return
81+
82+
# ── Project schema to requested columns only ──────────────────────────
83+
orso_schema_cols = [
84+
col
85+
for col in orso_schema.columns
86+
if col.identity in {c.schema_column.identity for c in self.columns}
87+
]
88+
orso_schema.columns = orso_schema_cols
89+
self.readings["columns_read"] += len(orso_schema.columns)
90+
91+
records_to_read = self.limit if self.limit is not None else float("inf")
92+
93+
filesystem = self.connector.filesystem
94+
# Column names as they appear in the Parquet file (Parquet uses the
95+
# original names, not identity aliases).
96+
column_names = [col.name for col in orso_schema.columns]
97+
# Map data-file column name → query-engine identity for Morsel construction.
98+
name_to_identity = {col.name: col.identity for col in orso_schema.columns}
99+
blob_paths = self.manifest.get_file_paths()
100+
101+
# One cache per execute() call: footers shared across all row groups of
102+
# the same file; column chunks cached for reuse across row groups with
103+
# identical content (rare but free).
104+
cache = InMemoryParquetCache()
105+
result_morsel = None
106+
107+
decode_start = time.monotonic_ns()
108+
try:
109+
for row_group in iter_row_groups(filesystem, blob_paths, column_names, cache):
110+
path = row_group.pop("__path__")
111+
rg_idx = row_group.pop("__row_group__")
112+
113+
# Assemble the projected columns into a Draken Morsel directly.
114+
# Each value is a DrakenVector; we map data-file names to identity
115+
# names so the morsel arrives downstream already correctly labelled.
116+
identity_names = [name_to_identity[col] for col in row_group]
117+
vectors = list(row_group.values())
118+
result_morsel = Morsel.from_vectors(identity_names, vectors)
119+
120+
num_rows = result_morsel.num_rows
121+
self.readings["rows_seen"] += num_rows
122+
self.readings["blobs_seen"] += 1
123+
124+
# ── LIMIT enforcement ─────────────────────────────────────────
125+
if records_to_read < num_rows:
126+
result_morsel = result_morsel.slice(0, int(records_to_read))
127+
records_to_read = 0
128+
else:
129+
records_to_read -= num_rows
130+
131+
self.readings["blobs_read"] += 1
132+
self.telemetry.blobs_read += 1
133+
self.readings["rows_read"] += result_morsel.num_rows
134+
self.telemetry.rows_read += result_morsel.num_rows
135+
self.readings["bytes_processed"] += result_morsel.nbytes
136+
self.telemetry.bytes_processed += result_morsel.nbytes
137+
138+
yield result_morsel
139+
140+
if records_to_read <= 0:
141+
break
142+
143+
finally:
144+
decode_ns = time.monotonic_ns() - decode_start
145+
self.readings["time_decoding_blobs"] = (
146+
self.readings.get("time_decoding_blobs", 0) + decode_ns
147+
)
148+
self.telemetry.time_decoding_blobs += decode_ns
149+
150+
# ── Empty result guard ────────────────────────────────────────────────
151+
if result_morsel is None:
152+
self.readings["empty_datasets"] += 1
153+
yield pyarrow.Table.from_arrays(
154+
[pyarrow.array([]) for _ in arrow_schema], schema=arrow_schema
155+
)

0 commit comments

Comments
 (0)