Skip to content

Commit 71c4992

Browse files
authored
Merge pull request #67 from mabel-dev/0.5.6
0.5.6
2 parents 5cc967d + ccb3fc5 commit 71c4992

File tree

4 files changed

+151
-3
lines changed

4 files changed

+151
-3
lines changed

TODO.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
- restore filter pushdowns
2-
- support "create view"/"drop view"/"update view" (https://github.com/mabel-dev/opteryx-core/issues/17)
2+
- "DROP VIEW" doesn't work (https://github.com/mabel-dev/opteryx-core/issues/17)
33
- support "comment" (https://github.com/mabel-dev/opteryx-core/issues/31)
44
- support redoing the statistics `ANALYZE TABLE opteryx.test.astronauts` (https://github.com/mabel-dev/opteryx-core/issues/57)
55
- 'MAKE T' test to test using the opteryx.tests datasets

opteryx/__version__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# THIS FILE IS AUTOMATICALLY UPDATED DURING THE BUILD PROCESS
22
# DO NOT EDIT THIS FILE DIRECTLY
33

4-
__build__ = 139
4+
__build__ = 141
55
__author__ = "@joocer"
66
__version__ = "0.5.9"
77
__lib__ = "opteryx-core"
8-
__build_date__ = "2026-01-04T16:13:49.116825+00:00Z"
8+
__build_date__ = "2026-01-04T19:08:13.754656+00:00Z"
99

1010
# Store the version here so:
1111
# 1) we don't load dependencies by storing it in __init__.py

opteryx/cursor.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,66 @@ def execute(
215215
self._description = None
216216
self._executed = True
217217

218+
def plan(
219+
self,
220+
operation: str,
221+
params: Optional[Iterable] = None,
222+
visibility_filters: Optional[Dict[str, Any]] = None,
223+
) -> dict:
224+
"""
225+
Produce a planner-only representation of the given SQL without executing it.
226+
227+
Parameters:
228+
operation: SQL query string
229+
params: optional parameters for parameterized queries
230+
visibility_filters: optional visibility filters passed to the binder
231+
output: 'json' (default) or 'mermaid' to return a mermaid string
232+
233+
Returns:
234+
A JSON serialized string describing the physical plan, or a mermaid string
235+
if `output == 'mermaid'`.
236+
"""
237+
self._ensure_open()
238+
239+
from opteryx.planner import query_planner
240+
241+
start = time.time_ns()
242+
physical_plan = query_planner(
243+
operation=operation,
244+
parameters=params,
245+
visibility_filters=visibility_filters,
246+
connection=self._connection,
247+
qid=self.id,
248+
telemetry=self._telemetry,
249+
)
250+
self._telemetry.time_planning += time.time_ns() - start
251+
252+
# build a JSON representation
253+
nodes = []
254+
for nid, node in physical_plan.nodes(data=True):
255+
try:
256+
node_entry = {
257+
"nid": nid,
258+
"identity": getattr(node, "identity", None),
259+
"type": getattr(node, "node_type", getattr(node, "name", None)),
260+
"config": node.plan_config()
261+
if hasattr(node, "plan_config")
262+
else getattr(node, "config", None),
263+
}
264+
except Exception:
265+
node_entry = {"nid": nid, "type": str(type(node))}
266+
nodes.append(node_entry)
267+
268+
edges = [{"source": s, "target": t, "relation": r} for s, t, r in physical_plan.edges()]
269+
270+
plan_dict = {
271+
"nodes": nodes,
272+
"edges": edges,
273+
"exit_points": list(physical_plan.get_exit_points()),
274+
}
275+
276+
return plan_dict
277+
218278
@property
219279
def result_type(self) -> ResultType:
220280
return self._result_type

opteryx/operators/read_node.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,11 @@ def name(self): # pragma: no cover
197197
"""friendly name for this step"""
198198
return "Read"
199199

200+
def sensors(self):
201+
base = super().sensors()
202+
base["commited_at"] = self.committed_at
203+
return base
204+
200205
@property
201206
def config(self):
202207
"""Additional details for this step"""
@@ -216,6 +221,89 @@ def config(self):
216221
f"{' WITH(' + ','.join(self.parameters.get('hints')) + ')' if self.parameters.get('hints') else ''})"
217222
)
218223

224+
def plan_config(self) -> dict:
225+
"""
226+
Structured configuration for planning/telemetry purposes.
227+
228+
Returns a dict containing:
229+
- files: list of {file_path, rows, bytes}
230+
- selection_pushdown: predicates (simple repr)
231+
- projection_pushdown: list of projected column identities/names
232+
- connector: connector type
233+
- relation: dataset name
234+
"""
235+
config = {
236+
"connector": getattr(self.connector, "__type__", None),
237+
"relation": self.parameters.get("relation"),
238+
"files": [],
239+
"selection_pushdown": [],
240+
"projection_pushdown": [],
241+
}
242+
243+
# If a manifest is attached, prefer its file entries
244+
manifest = getattr(self, "manifest", None) or self.parameters.get("manifest")
245+
pruned = getattr(self, "pruned_files", None) or self.parameters.get("pruned_files")
246+
if manifest is not None:
247+
# manifest.files contains FileEntry objects
248+
for f in manifest.files:
249+
config["files"].append(
250+
{"path": f.file_path, "rows": f.record_count, "bytes": f.uncompressed_size}
251+
)
252+
# If pruning reduced files, filter to pruned list
253+
if pruned:
254+
pruned_set = set(pruned)
255+
config["files"] = [ff for ff in config["files"] if ff.get("path") in pruned_set]
256+
elif pruned:
257+
# We only have file paths
258+
for p in pruned:
259+
config["files"].append({"path": p, "rows": None, "bytes": None})
260+
261+
# Selection pushdown: represent predicates simply
262+
try:
263+
config["selection_pushdown"] = [str(p) for p in (self.predicates or [])]
264+
except Exception:
265+
config["selection_pushdown"] = []
266+
267+
# Projection pushdown: provide schema index and column name for each projected column
268+
proj = []
269+
270+
schema_columns = getattr(self.schema, "columns", []) or []
271+
for c in self.columns or []:
272+
# use the column identity (internal identity) as the column_name
273+
identity = c.schema_column.identity
274+
schema_index = None
275+
for idx, sc in enumerate(schema_columns):
276+
if sc.identity == identity:
277+
schema_index = idx
278+
break
279+
proj.append({"schema_index": schema_index, "identity": identity})
280+
281+
config["projection_pushdown"] = proj
282+
283+
# Summary: aggregate totals for files/rows/bytes when available
284+
total_files = len(config["files"])
285+
# If any file lacks rows/bytes info, mark totals as None
286+
total_rows = None
287+
total_bytes = None
288+
if total_files == 0:
289+
total_rows = 0
290+
total_bytes = 0
291+
else:
292+
rows_known = all((f.get("rows") is not None for f in config["files"]))
293+
bytes_known = all((f.get("bytes") is not None for f in config["files"]))
294+
if rows_known:
295+
total_rows = sum((f.get("rows", 0) for f in config["files"]))
296+
if bytes_known:
297+
total_bytes = sum((f.get("bytes", 0) for f in config["files"]))
298+
299+
config["summary"] = {
300+
"total-files": total_files,
301+
"total-rows": total_rows,
302+
"total-bytes": total_bytes,
303+
}
304+
305+
return config
306+
219307
def execute(self, morsel, **kwargs) -> Generator:
220308
"""Perform this step, time how long is spent doing work"""
221309
if morsel == EOS:

0 commit comments

Comments
 (0)