astronomy-commons · nevencaplar · Feb 4, 2026 · Feb 2, 2026 · Feb 2, 2026 · Feb 2, 2026
diff --git a/src/lsdb/catalog/catalog.py b/src/lsdb/catalog/catalog.py
@@ -147,6 +147,17 @@ def query(self, expr: str) -> Catalog:
         Catalog
             A catalog that contains the data from the original catalog that complies with the query
             expression. If a margin exists, it is filtered according to the same query expression.
+
+        Examples
+        --------
+        Filter a small synthetic catalog using a pandas-style query string:
+
+        >>> import lsdb
+        >>> from lsdb.nested.datasets import generate_data
+        >>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
+        >>> catalog = lsdb.from_dataframe(nf.compute()[["ra", "dec", "id"]])
+        >>> filtered = catalog.query("ra < 100 and dec > 0")
+        >>> filtered.compute().head()
         """
         catalog = super().query(expr)
         if self.margin is not None:
@@ -283,6 +294,19 @@ def crossmatch(
             respective suffixes and, whenever specified, a set of extra columns generated by the
             crossmatch algorithm.
 
+        Examples
+        --------
+        Crossmatch two small synthetic catalogs:
+
+        >>> import lsdb
+        >>> from lsdb.nested.datasets import generate_data
+        >>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
+        >>> df = nf.compute()[["ra", "dec", "id"]]
+        >>> left = lsdb.from_dataframe(df, catalog_name="left")
+        >>> right = lsdb.from_dataframe(df, catalog_name="right")
+        >>> xmatch = left.crossmatch(right, n_neighbors=1, radius_arcsec=1.0, suffix_method="overlapping_columns")
+        >>> xmatch.compute().head()
+
         Raises
         ------
         TypeError
@@ -773,6 +797,19 @@ def map_partitions(
         Catalog | dd.Series
             A new catalog with each partition replaced with the output of the function applied to the original
             partition. If the function returns a non dataframe output, a dask Series will be returned.
+
+        Examples
+        --------
+        Apply a function to each partition (e.g., add a derived column):
+
+        >>> import lsdb
+        >>> from lsdb.nested.datasets import generate_data
+        >>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
+        >>> catalog = lsdb.from_dataframe(nf.compute()[["ra", "dec", "id"]])
+        >>> def add_flag(df):
+        ...     return df.assign(in_north=df["dec"] > 0)
+        >>> catalog2 = catalog.map_partitions(add_flag)
+        >>> catalog2.compute().head()
         """
         catalog = super().map_partitions(
             func,
@@ -1030,6 +1067,21 @@ def join(
         Catalog
             A new catalog with the columns from each of the input catalogs with their respective suffixes
             added, and the rows merged on the specified columns.
+
+        Examples
+        --------
+        Join two catalogs on a shared key within the same sky partitions:
+
+        >>> import lsdb
+        >>> from lsdb.nested.datasets import generate_data
+        >>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
+        >>> base = lsdb.from_dataframe(nf.compute()[["ra", "dec", "id"]])
+        >>> left = base.rename({"ra": "ra_left", "dec": "dec_left"})
+        >>> right = base.rename({"ra": "ra_right", "dec": "dec_right", "id": "id_right"}).map_partitions(
+        ...     lambda df: df.assign(right_flag=True)
+        ... )
+        >>> joined = left.join(right, left_on="id", right_on="id_right", suffix_method="overlapping_columns")
+        >>> joined.compute().head()
         """
         if suffixes is None:
             suffixes = _default_suffixes(self.name, other.name)
@@ -1402,6 +1454,16 @@ def write_catalog(
             If True, raises an error if the catalog is empty.
         **kwargs
             Arguments to pass to the parquet write operations
+
+        Examples
+        --------
+        Write a small synthetic catalog to disk:
+
+        >>> import lsdb
+        >>> from lsdb.nested.datasets import generate_data
+        >>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
+        >>> catalog = lsdb.from_dataframe(nf.compute()[["ra", "dec", "id"]], catalog_name="demo")
+        >>> catalog.write_catalog(<your path here> / "demo_catalog", overwrite=True)
         """
         if as_collection:
             self._check_unloaded_columns(default_columns)

diff --git a/src/lsdb/catalog/dataset/healpix_dataset.py b/src/lsdb/catalog/dataset/healpix_dataset.py
@@ -475,6 +475,18 @@ def get_partition(self, order: int, pixel: int) -> nd.NestedFrame:
         ------
         ValueError
             If no data exists for the specified pixel
+
+        Examples
+        --------
+        Get a single HEALPix partition from a small synthetic catalog:
+
+        >>> import lsdb
+        >>> from lsdb.nested.datasets import generate_data
+        >>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
+        >>> catalog = lsdb.from_dataframe(nf.compute()[["ra", "dec", "id"]])
+        >>> hp = catalog.get_healpix_pixels()[0]
+        >>> partition = catalog.get_partition(hp.order, hp.pixel)
+        >>> partition.compute().head()
         """
         partition_index = self.get_partition_index(order, pixel)
         return self._ddf.partitions[partition_index]
@@ -740,6 +752,17 @@ def cone_search(self, ra: float, dec: float, radius_arcsec: float, fine: bool =
         Self
             A new Catalog containing the points filtered to those within the cone, and the partitions that
             overlap the cone.
+
+        Examples
+        --------
+        Filter a small synthetic catalog to a cone on the sky:
+
+        >>> import lsdb
+        >>> from lsdb.nested.datasets import generate_data
+        >>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
+        >>> catalog = lsdb.from_dataframe(nf.compute()[["ra", "dec", "id"]])
+        >>> cone = catalog.cone_search(ra=150.0, dec=0.0, radius_arcsec=3600)
+        >>> cone.compute().head()
         """
         return self.search(ConeSearch(ra, dec, radius_arcsec, fine))
 
@@ -1130,6 +1153,16 @@ def plot_pixels(self, projection: str = "MOL", **kwargs) -> tuple[Figure, WCSAxe
         Returns
         -------
         tuple[Figure, WCSAxes]
+
+        Examples
+        --------
+        Plot pixel density for a small synthetic catalog:
+
+        >>> import lsdb
+        >>> from lsdb.nested.datasets import generate_data
+        >>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
+        >>> catalog = lsdb.from_dataframe(nf.compute()[["ra", "dec", "id"]])
+        >>> fig, ax = catalog.plot_pixels()
         """
         return self.hc_structure.plot_pixels(projection=projection, **kwargs)
 
@@ -1144,6 +1177,16 @@ def plot_coverage(self, **kwargs) -> tuple[Figure, WCSAxes]:
         Returns
         -------
         tuple[Figure, WCSAxes]
+
+        Examples
+        --------
+        Plot coverage for a small synthetic catalog:
+
+        >>> import lsdb
+        >>> from lsdb.nested.datasets import generate_data
+        >>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
+        >>> catalog = lsdb.from_dataframe(nf.compute()[["ra", "dec", "id"]])
+        >>> fig, ax = catalog.plot_coverage()
         """
         return self.hc_structure.plot_moc(**kwargs)
 

diff --git a/src/lsdb/loaders/dataframe/from_astropy.py b/src/lsdb/loaders/dataframe/from_astropy.py
@@ -51,9 +51,16 @@ def from_astropy(
     partition_rows : int or None, default None
         The desired partition size, in number of rows. Only one of
         `partition_rows` or `partition_bytes` should be specified.
+
+        Note: partitioning is spatial (HEALPix-based). `partition_rows` is a best-effort target,
+        and the resulting number of partitions is limited by `highest_order` and the sky footprint
+        of your data.
     partition_bytes : int or None, default None
         The desired partition size, in bytes. Only one of
         `partition_rows` or `partition_bytes` should be specified.
+
+        Note: as with `partition_rows`, this is a best-effort target for spatial (HEALPix-based)
+        partitioning and is limited by `highest_order`.
     margin_order : int, default -1
         The order at which to generate the margin cache.
     margin_threshold : float or None, default 5

diff --git a/src/lsdb/loaders/dataframe/from_dataframe.py b/src/lsdb/loaders/dataframe/from_dataframe.py
@@ -53,9 +53,17 @@ def from_dataframe(
     partition_rows : int or None, default None
         The desired partition size, in number of rows. Only one of
         `partition_rows` or `partition_bytes` should be specified.
+
+        Note: partitioning is spatial (HEALPix-based). `partition_rows` is a best-effort target,
+        and the resulting number of partitions is limited by `highest_order` and the sky footprint
+        of your data (e.g., if all rows fall into a single HEALPix pixel at `highest_order`, you will
+        still get a single partition).
     partition_bytes : int or None, default None
         The desired partition size, in bytes. Only one of
         `partition_rows` or `partition_bytes` should be specified.
+
+        Note: as with `partition_rows`, this is a best-effort target for spatial (HEALPix-based)
+        partitioning and is limited by `highest_order`.
     margin_order : int, default -1
         The order at which to generate the margin cache.
     margin_threshold : float or None, default 5
@@ -84,6 +92,17 @@ def from_dataframe(
     ------
     ValueError
         If RA/Dec columns are not found or contain NaN values.
+
+    Examples
+    --------
+    Create a small, synthetic sky catalog and load it into LSDB:
+
+    >>> import lsdb
+    >>> from lsdb.nested.datasets import generate_data
+    >>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
+    >>> df = nf.compute()[["ra", "dec", "id"]]
+    >>> catalog = lsdb.from_dataframe(df, catalog_name="toy_catalog")
+    >>> catalog.compute().head()
     """
     # Load the catalog.
     catalog = DataframeCatalogLoader(