@@ -169,6 +169,44 @@ def to_delayed(self, optimize_graph: bool = True) -> list[Delayed]:
169169 """
170170 return self ._ddf .to_delayed (optimize_graph = optimize_graph )
171171
172+ def to_dask_dataframe (self ) -> dd .DataFrame :
173+ """Convert the dataset to a Dask DataFrame.
174+
175+ Returns
176+ -------
177+ dd.DataFrame
178+ The Dask DataFrame representation of the dataset.
179+
180+ Examples
181+ --------
182+ >>> import lsdb
183+ >>> catalog = lsdb.from_dataframe(pd.DataFrame({"ra":[0, 10], "dec":[5, 15],
184+ ... "mag":[21, 22], "mag_err":[.1, .2]}))
185+ >>> ddf = catalog.to_dask_dataframe()
186+ >>> ddf # doctest: +NORMALIZE_WHITESPACE
187+ Dask DataFrame Structure:
188+ ra dec mag mag_err
189+ npartitions=1
190+ 1369094286720630784 int64[pyarrow] int64[pyarrow] int64[pyarrow] double[pyarrow]
191+ 1441151880758558720 ... ... ... ...
192+ Dask Name: nestedframe, 3 expressions
193+ Expr=Dask NestedFrame Structure:
194+ ra dec mag mag_err
195+ npartitions=1
196+ 1369094286720630784 int64[pyarrow] int64[pyarrow] int64[pyarrow] double[pyarrow]
197+ 1441151880758558720 ... ... ... ...
198+ Dask Name: nestedframe, 3 expressions
199+ Expr=MapPartitions(NestedFrame)
200+
201+ Notes
202+ -----
203+ This method returns a Dask DataFrame. However, be aware that
204+ the underlying in-memory DataFrame for each partition is still a
205+ nested-pandas NestedFrame, rather than a pandas DataFrame.
206+ """
207+ # self._ddf is a NestedFrame, which is a subclass of dd.DataFrame
208+ return dd .DataFrame (self ._ddf )
209+
172210 @property
173211 def name (self ):
174212 """The name of the catalog"""
0 commit comments