apache · sryza · Oct 6, 2025
diff --git a/python/pyspark/pipelines/spark_connect_graph_element_registry.py b/python/pyspark/pipelines/spark_connect_graph_element_registry.py
@@ -45,10 +45,12 @@ def __init__(self, spark: SparkSession, dataflow_graph_id: str) -> None:
 
     def register_dataset(self, dataset: Dataset) -> None:
         if isinstance(dataset, Table):
-            table_properties = dataset.table_properties
-            partition_cols = dataset.partition_cols
-            schema = None  # TODO
-            format = dataset.format
+            table_details = pb2.PipelineCommand.DefineDataset.TableDetails(
+                table_properties=dataset.table_properties,
+                partition_cols=dataset.partition_cols,
+                schema=None,  # TODO
+                format=dataset.format,
+            )
 
             if isinstance(dataset, MaterializedView):
                 dataset_type = pb2.DatasetType.MATERIALIZED_VIEW
@@ -59,29 +61,29 @@ def register_dataset(self, dataset: Dataset) -> None:
                     errorClass="UNSUPPORTED_PIPELINES_DATASET_TYPE",
                     messageParameters={"dataset_type": type(dataset).__name__},
                 )
+
+            inner_command = pb2.PipelineCommand.DefineDataset(
+                dataflow_graph_id=self._dataflow_graph_id,
+                dataset_name=dataset.name,
+                dataset_type=dataset_type,
+                comment=dataset.comment,
+                table_details=table_details,
+                source_code_location=source_code_location_to_proto(dataset.source_code_location),
+            )
         elif isinstance(dataset, TemporaryView):
-            table_properties = None
-            partition_cols = None
-            schema = None
-            format = None
             dataset_type = pb2.DatasetType.TEMPORARY_VIEW
+            inner_command = pb2.PipelineCommand.DefineDataset(
+                dataflow_graph_id=self._dataflow_graph_id,
+                dataset_name=dataset.name,
+                dataset_type=dataset_type,
+                comment=dataset.comment,
+                source_code_location=source_code_location_to_proto(dataset.source_code_location),
+            )
         else:
             raise PySparkTypeError(
                 errorClass="UNSUPPORTED_PIPELINES_DATASET_TYPE",
                 messageParameters={"dataset_type": type(dataset).__name__},
             )
-
-        inner_command = pb2.PipelineCommand.DefineDataset(
-            dataflow_graph_id=self._dataflow_graph_id,
-            dataset_name=dataset.name,
-            dataset_type=dataset_type,
-            comment=dataset.comment,
-            table_properties=table_properties,
-            partition_cols=partition_cols,
-            schema=schema,
-            format=format,
-            source_code_location=source_code_location_to_proto(dataset.source_code_location),
-        )
         command = pb2.Command()
         command.pipeline_command.define_dataset.CopyFrom(inner_command)
         self._client.execute_command(command)
@@ -91,11 +93,15 @@ def register_flow(self, flow: Flow) -> None:
             df = flow.func()
         relation = cast(ConnectDataFrame, df)._plan.plan(self._client)
 
+        relation_flow_details = pb2.PipelineCommand.DefineFlow.WriteRelationFlowDetails(
+            relation=relation,
+        )
+
         inner_command = pb2.PipelineCommand.DefineFlow(
             dataflow_graph_id=self._dataflow_graph_id,
             flow_name=flow.name,
             target_dataset_name=flow.target,
-            relation=relation,
+            relation_flow_details=relation_flow_details,
             sql_conf=flow.spark_conf,
             source_code_location=source_code_location_to_proto(flow.source_code_location),
         )