Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
db3e664
Modify random sample to cover large part of the sky
nevencaplar Feb 2, 2026
d690d59
Add API example for from_dataframe
nevencaplar Feb 2, 2026
15c6fc9
Add clarification about highest order being main limiting factor
nevencaplar Feb 2, 2026
2cf510a
Add query example
nevencaplar Feb 2, 2026
9d9beb7
Add example for map_partitions
nevencaplar Feb 2, 2026
c1159c1
Add example for cone_search
nevencaplar Feb 2, 2026
60d41d5
Add join example
nevencaplar Feb 2, 2026
5686171
Add xmatch example
nevencaplar Feb 2, 2026
c094ff2
Add write_catalog example
nevencaplar Feb 2, 2026
76a5dd2
Add get_partition example
nevencaplar Feb 2, 2026
b46f7d9
Add examples for plotting
nevencaplar Feb 2, 2026
993e738
Remove from_dataframe in tutorials directory.
nevencaplar Feb 2, 2026
cfd37b5
Modify compute.head to .head
nevencaplar Feb 3, 2026
ae5d881
Add output for map_partitions example
nevencaplar Feb 3, 2026
88edae6
Add output for query example
nevencaplar Feb 3, 2026
0398d18
Add output for from_dataframe example
nevencaplar Feb 3, 2026
328ffd6
Add output for coneSearch example
nevencaplar Feb 3, 2026
6d0b3c2
Add output for join example
nevencaplar Feb 3, 2026
fa3f433
Add output for crossmatch example
nevencaplar Feb 3, 2026
63543d8
Add output for get_partition example
nevencaplar Feb 3, 2026
e03450c
Remove whitespace behind _healpix_29
nevencaplar Feb 3, 2026
6d515c1
Remove trailing whitespace in from_dataframe example
nevencaplar Feb 3, 2026
e705ff8
Fix length problem in xmatch example
nevencaplar Feb 3, 2026
c3195b7
Add doctest exceptions
nevencaplar Feb 3, 2026
cc4e6ba
Skip test of output for plotting
nevencaplar Feb 3, 2026
9e57c7b
Reduce number of columns in wide output dfs
nevencaplar Feb 3, 2026
acb6232
Avoid long lines in modified examples
nevencaplar Feb 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions src/lsdb/catalog/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,17 @@ def query(self, expr: str) -> Catalog:
Catalog
A catalog that contains the data from the original catalog that complies with the query
expression. If a margin exists, it is filtered according to the same query expression.

Examples
--------
Filter a small synthetic catalog using a pandas-style query string:

>>> import lsdb
>>> from lsdb.nested.datasets import generate_data
>>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
>>> catalog = lsdb.from_dataframe(nf.compute()[["ra", "dec", "id"]])
>>> filtered = catalog.query("ra < 100 and dec > 0")
>>> filtered.compute().head()
"""
catalog = super().query(expr)
if self.margin is not None:
Expand Down Expand Up @@ -283,6 +294,19 @@ def crossmatch(
respective suffixes and, whenever specified, a set of extra columns generated by the
crossmatch algorithm.

Examples
--------
Crossmatch two small synthetic catalogs:

>>> import lsdb
>>> from lsdb.nested.datasets import generate_data
>>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
>>> df = nf.compute()[["ra", "dec", "id"]]
>>> left = lsdb.from_dataframe(df, catalog_name="left")
>>> right = lsdb.from_dataframe(df, catalog_name="right")
>>> xmatch = left.crossmatch(right, n_neighbors=1, radius_arcsec=1.0, suffix_method="overlapping_columns")
>>> xmatch.compute().head()

Raises
------
TypeError
Expand Down Expand Up @@ -773,6 +797,19 @@ def map_partitions(
Catalog | dd.Series
A new catalog with each partition replaced with the output of the function applied to the original
partition. If the function returns a non dataframe output, a dask Series will be returned.

Examples
--------
Apply a function to each partition (e.g., add a derived column):

>>> import lsdb
>>> from lsdb.nested.datasets import generate_data
>>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
>>> catalog = lsdb.from_dataframe(nf.compute()[["ra", "dec", "id"]])
>>> def add_flag(df):
... return df.assign(in_north=df["dec"] > 0)
>>> catalog2 = catalog.map_partitions(add_flag)
>>> catalog2.compute().head()
"""
catalog = super().map_partitions(
func,
Expand Down Expand Up @@ -1030,6 +1067,21 @@ def join(
Catalog
A new catalog with the columns from each of the input catalogs with their respective suffixes
added, and the rows merged on the specified columns.

Examples
--------
Join two catalogs on a shared key within the same sky partitions:

>>> import lsdb
>>> from lsdb.nested.datasets import generate_data
>>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
>>> base = lsdb.from_dataframe(nf.compute()[["ra", "dec", "id"]])
>>> left = base.rename({"ra": "ra_left", "dec": "dec_left"})
>>> right = base.rename({"ra": "ra_right", "dec": "dec_right", "id": "id_right"}).map_partitions(
... lambda df: df.assign(right_flag=True)
... )
>>> joined = left.join(right, left_on="id", right_on="id_right", suffix_method="overlapping_columns")
>>> joined.compute().head()
"""
if suffixes is None:
suffixes = _default_suffixes(self.name, other.name)
Expand Down Expand Up @@ -1402,6 +1454,16 @@ def write_catalog(
If True, raises an error if the catalog is empty.
**kwargs
Arguments to pass to the parquet write operations

Examples
--------
Write a small synthetic catalog to disk:

>>> import lsdb
>>> from lsdb.nested.datasets import generate_data
>>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
>>> catalog = lsdb.from_dataframe(nf.compute()[["ra", "dec", "id"]], catalog_name="demo")
>>> catalog.write_catalog(<your path here> / "demo_catalog", overwrite=True)
"""
if as_collection:
self._check_unloaded_columns(default_columns)
Expand Down
43 changes: 43 additions & 0 deletions src/lsdb/catalog/dataset/healpix_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,18 @@ def get_partition(self, order: int, pixel: int) -> nd.NestedFrame:
------
ValueError
If no data exists for the specified pixel

Examples
--------
Get a single HEALPix partition from a small synthetic catalog:

>>> import lsdb
>>> from lsdb.nested.datasets import generate_data
>>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
>>> catalog = lsdb.from_dataframe(nf.compute()[["ra", "dec", "id"]])
>>> hp = catalog.get_healpix_pixels()[0]
>>> partition = catalog.get_partition(hp.order, hp.pixel)
>>> partition.compute().head()
"""
partition_index = self.get_partition_index(order, pixel)
return self._ddf.partitions[partition_index]
Expand Down Expand Up @@ -740,6 +752,17 @@ def cone_search(self, ra: float, dec: float, radius_arcsec: float, fine: bool =
Self
A new Catalog containing the points filtered to those within the cone, and the partitions that
overlap the cone.

Examples
--------
Filter a small synthetic catalog to a cone on the sky:

>>> import lsdb
>>> from lsdb.nested.datasets import generate_data
>>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
>>> catalog = lsdb.from_dataframe(nf.compute()[["ra", "dec", "id"]])
>>> cone = catalog.cone_search(ra=150.0, dec=0.0, radius_arcsec=3600)
>>> cone.compute().head()
"""
return self.search(ConeSearch(ra, dec, radius_arcsec, fine))

Expand Down Expand Up @@ -1130,6 +1153,16 @@ def plot_pixels(self, projection: str = "MOL", **kwargs) -> tuple[Figure, WCSAxe
Returns
-------
tuple[Figure, WCSAxes]

Examples
--------
Plot pixel density for a small synthetic catalog:

>>> import lsdb
>>> from lsdb.nested.datasets import generate_data
>>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
>>> catalog = lsdb.from_dataframe(nf.compute()[["ra", "dec", "id"]])
>>> fig, ax = catalog.plot_pixels()
"""
return self.hc_structure.plot_pixels(projection=projection, **kwargs)

Expand All @@ -1144,6 +1177,16 @@ def plot_coverage(self, **kwargs) -> tuple[Figure, WCSAxes]:
Returns
-------
tuple[Figure, WCSAxes]

Examples
--------
Plot coverage for a small synthetic catalog:

>>> import lsdb
>>> from lsdb.nested.datasets import generate_data
>>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
>>> catalog = lsdb.from_dataframe(nf.compute()[["ra", "dec", "id"]])
>>> fig, ax = catalog.plot_coverage()
"""
return self.hc_structure.plot_moc(**kwargs)

Expand Down
7 changes: 7 additions & 0 deletions src/lsdb/loaders/dataframe/from_astropy.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,16 @@ def from_astropy(
partition_rows : int or None, default None
The desired partition size, in number of rows. Only one of
`partition_rows` or `partition_bytes` should be specified.

Note: partitioning is spatial (HEALPix-based). `partition_rows` is a best-effort target,
and the resulting number of partitions is limited by `highest_order` and the sky footprint
of your data.
partition_bytes : int or None, default None
The desired partition size, in bytes. Only one of
`partition_rows` or `partition_bytes` should be specified.

Note: as with `partition_rows`, this is a best-effort target for spatial (HEALPix-based)
partitioning and is limited by `highest_order`.
margin_order : int, default -1
The order at which to generate the margin cache.
margin_threshold : float or None, default 5
Expand Down
19 changes: 19 additions & 0 deletions src/lsdb/loaders/dataframe/from_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,17 @@ def from_dataframe(
partition_rows : int or None, default None
The desired partition size, in number of rows. Only one of
`partition_rows` or `partition_bytes` should be specified.

Note: partitioning is spatial (HEALPix-based). `partition_rows` is a best-effort target,
and the resulting number of partitions is limited by `highest_order` and the sky footprint
of your data (e.g., if all rows fall into a single HEALPix pixel at `highest_order`, you will
still get a single partition).
partition_bytes : int or None, default None
The desired partition size, in bytes. Only one of
`partition_rows` or `partition_bytes` should be specified.

Note: as with `partition_rows`, this is a best-effort target for spatial (HEALPix-based)
partitioning and is limited by `highest_order`.
margin_order : int, default -1
The order at which to generate the margin cache.
margin_threshold : float or None, default 5
Expand Down Expand Up @@ -84,6 +92,17 @@ def from_dataframe(
------
ValueError
If RA/Dec columns are not found or contain NaN values.

Examples
--------
Create a small, synthetic sky catalog and load it into LSDB:

>>> import lsdb
>>> from lsdb.nested.datasets import generate_data
>>> nf = generate_data(1000, 5, seed=0, ra_range=(0.0, 300.0), dec_range=(-50.0, 50.0))
>>> df = nf.compute()[["ra", "dec", "id"]]
>>> catalog = lsdb.from_dataframe(df, catalog_name="toy_catalog")
>>> catalog.compute().head()
"""
# Load the catalog.
catalog = DataframeCatalogLoader(
Expand Down
Loading