Merge pull request #67 from mabel-dev/0.5.6

joocer · web-flow · commit 71c49928888a · 2026-01-04T19:10:09.000Z
0.5.6
diff --git a/TODO.md b/TODO.md
@@ -1,5 +1,5 @@
 - restore filter pushdowns
-- support "create view"/"drop view"/"update view" (https://github.com/mabel-dev/opteryx-core/issues/17)
+- "DROP VIEW" doesn't work (https://github.com/mabel-dev/opteryx-core/issues/17)
 - support "comment" (https://github.com/mabel-dev/opteryx-core/issues/31)
 - support redoing the statistics `ANALYZE TABLE opteryx.test.astronauts` (https://github.com/mabel-dev/opteryx-core/issues/57)
 - 'MAKE T' test to test using the opteryx.tests datasets
diff --git a/opteryx/__version__.py b/opteryx/__version__.py
@@ -1,11 +1,11 @@
 # THIS FILE IS AUTOMATICALLY UPDATED DURING THE BUILD PROCESS
 # DO NOT EDIT THIS FILE DIRECTLY
 
-__build__ = 139
+__build__ = 141
 __author__ = "@joocer"
 __version__ = "0.5.9"
 __lib__ = "opteryx-core"
-__build_date__ = "2026-01-04T16:13:49.116825+00:00Z"
+__build_date__ = "2026-01-04T19:08:13.754656+00:00Z"
 
 # Store the version here so:
 # 1) we don't load dependencies by storing it in __init__.py
diff --git a/opteryx/cursor.py b/opteryx/cursor.py
@@ -215,6 +215,66 @@ def execute(
             self._description = None
         self._executed = True
 
+    def plan(
+        self,
+        operation: str,
+        params: Optional[Iterable] = None,
+        visibility_filters: Optional[Dict[str, Any]] = None,
+    ) -> dict:
+        """
+        Produce a planner-only representation of the given SQL without executing it.
+
+        Parameters:
+            operation: SQL query string
+            params: optional parameters for parameterized queries
+            visibility_filters: optional visibility filters passed to the binder
+            output: 'json' (default) or 'mermaid' to return a mermaid string
+
+        Returns:
+            A JSON serialized string describing the physical plan, or a mermaid string
+            if `output == 'mermaid'`.
+        """
+        self._ensure_open()
+
+        from opteryx.planner import query_planner
+
+        start = time.time_ns()
+        physical_plan = query_planner(
+            operation=operation,
+            parameters=params,
+            visibility_filters=visibility_filters,
+            connection=self._connection,
+            qid=self.id,
+            telemetry=self._telemetry,
+        )
+        self._telemetry.time_planning += time.time_ns() - start
+
+        # build a JSON representation
+        nodes = []
+        for nid, node in physical_plan.nodes(data=True):
+            try:
+                node_entry = {
+                    "nid": nid,
+                    "identity": getattr(node, "identity", None),
+                    "type": getattr(node, "node_type", getattr(node, "name", None)),
+                    "config": node.plan_config()
+                    if hasattr(node, "plan_config")
+                    else getattr(node, "config", None),
+                }
+            except Exception:
+                node_entry = {"nid": nid, "type": str(type(node))}
+            nodes.append(node_entry)
+
+        edges = [{"source": s, "target": t, "relation": r} for s, t, r in physical_plan.edges()]
+
+        plan_dict = {
+            "nodes": nodes,
+            "edges": edges,
+            "exit_points": list(physical_plan.get_exit_points()),
+        }
+
+        return plan_dict
+
     @property
     def result_type(self) -> ResultType:
         return self._result_type
diff --git a/opteryx/operators/read_node.py b/opteryx/operators/read_node.py
@@ -197,6 +197,11 @@ def name(self):  # pragma: no cover
         """friendly name for this step"""
         return "Read"
 
+    def sensors(self):
+        base = super().sensors()
+        base["commited_at"] = self.committed_at
+        return base
+
     @property
     def config(self):
         """Additional details for this step"""
@@ -216,6 +221,89 @@ def config(self):
             f"{' WITH(' + ','.join(self.parameters.get('hints')) + ')' if self.parameters.get('hints') else ''})"
         )
 
+    def plan_config(self) -> dict:
+        """
+        Structured configuration for planning/telemetry purposes.
+
+        Returns a dict containing:
+          - files: list of {file_path, rows, bytes}
+          - selection_pushdown: predicates (simple repr)
+          - projection_pushdown: list of projected column identities/names
+          - connector: connector type
+          - relation: dataset name
+        """
+        config = {
+            "connector": getattr(self.connector, "__type__", None),
+            "relation": self.parameters.get("relation"),
+            "files": [],
+            "selection_pushdown": [],
+            "projection_pushdown": [],
+        }
+
+        # If a manifest is attached, prefer its file entries
+        manifest = getattr(self, "manifest", None) or self.parameters.get("manifest")
+        pruned = getattr(self, "pruned_files", None) or self.parameters.get("pruned_files")
+        if manifest is not None:
+            # manifest.files contains FileEntry objects
+            for f in manifest.files:
+                config["files"].append(
+                    {"path": f.file_path, "rows": f.record_count, "bytes": f.uncompressed_size}
+                )
+            # If pruning reduced files, filter to pruned list
+            if pruned:
+                pruned_set = set(pruned)
+                config["files"] = [ff for ff in config["files"] if ff.get("path") in pruned_set]
+        elif pruned:
+            # We only have file paths
+            for p in pruned:
+                config["files"].append({"path": p, "rows": None, "bytes": None})
+
+        # Selection pushdown: represent predicates simply
+        try:
+            config["selection_pushdown"] = [str(p) for p in (self.predicates or [])]
+        except Exception:
+            config["selection_pushdown"] = []
+
+        # Projection pushdown: provide schema index and column name for each projected column
+        proj = []
+
+        schema_columns = getattr(self.schema, "columns", []) or []
+        for c in self.columns or []:
+            # use the column identity (internal identity) as the column_name
+            identity = c.schema_column.identity
+            schema_index = None
+            for idx, sc in enumerate(schema_columns):
+                if sc.identity == identity:
+                    schema_index = idx
+                    break
+            proj.append({"schema_index": schema_index, "identity": identity})
+
+        config["projection_pushdown"] = proj
+
+        # Summary: aggregate totals for files/rows/bytes when available
+        total_files = len(config["files"])
+        # If any file lacks rows/bytes info, mark totals as None
+        total_rows = None
+        total_bytes = None
+        if total_files == 0:
+            total_rows = 0
+            total_bytes = 0
+        else:
+            rows_known = all((f.get("rows") is not None for f in config["files"]))
+            bytes_known = all((f.get("bytes") is not None for f in config["files"]))
+            if rows_known:
+                total_rows = sum((f.get("rows", 0) for f in config["files"]))
+            if bytes_known:
+                total_bytes = sum((f.get("bytes", 0) for f in config["files"]))
+
+        config["summary"] = {
+            "total-files": total_files,
+            "total-rows": total_rows,
+            "total-bytes": total_bytes,
+        }
+
+        return config
+
     def execute(self, morsel, **kwargs) -> Generator:
         """Perform this step, time how long is spent doing work"""
         if morsel == EOS: