Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions docs/content/reference/migration/migration-0-28.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,30 @@ rotation and scale.
Naturally, if any update to a transform always changes the same components, this does not cause any changes other than
the simplification of not having to clear out all other components that may ever be set, thus reducing memory bloat both
on send and query!

## Python SDK: "partition" renamed to "segment" in catalog APIs

<!-- TODO(ab): as I roll more API updates, I'll keep that section up-to-date -->

In the `rerun.catalog` module, all APIs using "partition" terminology have been renamed to use "segment" instead.
The old APIs are deprecated and will be removed in a future release.

| Old API | New API |
|---------|---------|
| `DatasetEntry.partition_ids()` | `DatasetEntry.segment_ids()` |
| `DatasetEntry.partition_table()` | `DatasetEntry.segment_table()` |
| `DatasetEntry.partition_url()` | `DatasetEntry.segment_url()` |
| `DatasetEntry.download_partition()` | `DatasetEntry.download_segment()` |
| `DatasetEntry.default_blueprint_partition_id()` | `DatasetEntry.default_blueprint_segment_id()` |
| `DatasetEntry.set_default_blueprint_partition_id()` | `DatasetEntry.set_default_blueprint_segment_id()` |
| `DataframeQueryView.filter_partition_id()` | `DataframeQueryView.filter_segment_id()` |

The DataFusion utility functions in `rerun.utilities.datafusion.functions.url_generation` have also been renamed:

| Old API | New API |
|---------|---------|
| `partition_url()` | `segment_url()` |
| `partition_url_udf()` | `segment_url_udf()` |
| `partition_url_with_timeref_udf()` | `segment_url_with_timeref_udf()` |

The partition table columns have also been renamed from `rerun_partition_id` to `rerun_segment_id`.
18 changes: 9 additions & 9 deletions rerun_py/rerun_bindings/rerun_bindings.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1305,17 +1305,17 @@ class DatasetEntryInternal:

def blueprint_dataset_id(self) -> EntryId | None: ...
def blueprint_dataset(self) -> DatasetEntryInternal | None: ...
def default_blueprint_partition_id(self) -> str | None: ...
def set_default_blueprint_partition_id(self, partition_id: str | None) -> None: ...
def default_blueprint_segment_id(self) -> str | None: ...
def set_default_blueprint_segment_id(self, segment_id: str | None) -> None: ...

# ---

def partition_ids(self) -> list[str]: ...
def partition_table(self) -> DataFusionTable: ...
def segment_ids(self) -> list[str]: ...
def segment_table(self) -> DataFusionTable: ...
def manifest(self) -> DataFusionTable: ...
def partition_url(
def segment_url(
self,
partition_id: str,
segment_id: str,
timeline: str | None = None,
start: datetime | int | None = None,
end: datetime | int | None = None,
Expand All @@ -1329,7 +1329,7 @@ class DatasetEntryInternal:

# ---

def download_partition(self, partition_id: str) -> Recording: ...
def download_segment(self, segment_id: str) -> Recording: ...
def dataframe_query_view(
self,
*,
Expand Down Expand Up @@ -1431,8 +1431,8 @@ class _IndexValuesLikeInternal:
class DataframeQueryView:
"""View into a remote dataset acting as DataFusion table provider."""

def filter_partition_id(self, partition_id: str, *args: Iterable[str]) -> Self:
"""Filter by one or more partition ids. All partition ids are included if not specified."""
def filter_segment_id(self, segment_id: str, *args: Iterable[str]) -> Self:
"""Filter by one or more segment ids. All segment ids are included if not specified."""

def filter_range_sequence(self, start: int, end: int) -> Self:
"""
Expand Down
92 changes: 65 additions & 27 deletions rerun_py/rerun_sdk/rerun/catalog/_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from abc import ABC
from typing import TYPE_CHECKING, Any, Generic, TypeVar

from typing_extensions import deprecated

from rerun_bindings import DatasetEntryInternal, TableEntryInternal

if TYPE_CHECKING:
Expand Down Expand Up @@ -126,84 +128,115 @@ def blueprint_dataset(self) -> DatasetEntry | None:
ds = self._internal.blueprint_dataset()
return None if ds is None else DatasetEntry(ds)

def default_blueprint_partition_id(self) -> str | None:
"""The default blueprint partition ID for this dataset, if any."""
def default_blueprint_segment_id(self) -> str | None:
"""The default blueprint segment ID for this dataset, if any."""

return self._internal.default_blueprint_partition_id()
return self._internal.default_blueprint_segment_id()

def set_default_blueprint_partition_id(self, partition_id: str | None) -> None:
def set_default_blueprint_segment_id(self, segment_id: str | None) -> None:
"""
Set the default blueprint partition ID for this dataset.
Set the default blueprint segment ID for this dataset.

Pass `None` to clear the bluprint. This fails if the change cannot be made to the remote server.
Pass `None` to clear the blueprint. This fails if the change cannot be made to the remote server.
"""

return self._internal.set_default_blueprint_partition_id(partition_id)
return self._internal.set_default_blueprint_segment_id(segment_id)

@deprecated("Use default_blueprint_segment_id() instead")
def default_blueprint_partition_id(self) -> str | None:
"""The default blueprint partition ID for this dataset, if any."""
return self.default_blueprint_segment_id()

@deprecated("Use set_default_blueprint_segment_id() instead")
def set_default_blueprint_partition_id(self, partition_id: str | None) -> None:
"""Set the default blueprint partition ID for this dataset."""
return self.set_default_blueprint_segment_id(partition_id)

def schema(self) -> Schema:
"""Return the schema of the data contained in the dataset."""

return self._internal.schema()

def segment_ids(self) -> list[str]:
"""Returns a list of segment IDs for the dataset."""

return self._internal.segment_ids()

@deprecated("Use segment_ids() instead")
def partition_ids(self) -> list[str]:
"""Returns a list of partitions IDs for the dataset."""
"""Returns a list of partition IDs for the dataset."""
return self.segment_ids()

def segment_table(self) -> DataFusionTable:
"""Return the segment table as a Datafusion table provider."""

return self._internal.partition_ids()
return self._internal.segment_table()

@deprecated("Use segment_table() instead")
def partition_table(self) -> DataFusionTable:
"""Return the partition table as a Datafusion table provider."""

return self._internal.partition_table()
return self.segment_table()

def manifest(self) -> DataFusionTable:
"""Return the dataset manifest as a Datafusion table provider."""

return self._internal.manifest()

def partition_url( # noqa: PLR0917
def segment_url( # noqa: PLR0917
self,
partition_id: str,
segment_id: str,
timeline: str | None = None,
start: datetime | int | None = None,
end: datetime | int | None = None,
) -> str:
"""
Return the URL for the given partition.
Return the URL for the given segment.

Parameters
----------
partition_id: str
The ID of the partition to get the URL for.
segment_id: str
The ID of the segment to get the URL for.

timeline: str | None
The name of the timeline to display.

start: int | datetime | None
The start time for the partition.
The start time for the segment.
Integer for ticks, or datetime/nanoseconds for timestamps.

end: int | datetime | None
The end time for the partition.
The end time for the segment.
Integer for ticks, or datetime/nanoseconds for timestamps.

Examples
--------
# With ticks
>>> start_tick, end_time = 0, 10
>>> dataset.partition_url("some_id", "log_tick", start_tick, end_time)
>>> dataset.segment_url("some_id", "log_tick", start_tick, end_time)

# With timestamps
>>> start_time, end_time = datetime.now() - timedelta(seconds=4), datetime.now()
>>> dataset.partition_url("some_id", "real_time", start_time, end_time)
>>> dataset.segment_url("some_id", "real_time", start_time, end_time)

Returns
-------
str
The URL for the given partition.
The URL for the given segment.

"""

return self._internal.partition_url(partition_id, timeline, start, end)
return self._internal.segment_url(segment_id, timeline, start, end)

@deprecated("Use segment_url() instead")
def partition_url( # noqa: PLR0917
self,
partition_id: str,
timeline: str | None = None,
start: datetime | int | None = None,
end: datetime | int | None = None,
) -> str:
"""Return the URL for the given partition."""
return self.segment_url(partition_id, timeline, start, end)

def register(self, recording_uri: str, *, recording_layer: str = "base", timeout_secs: int = 60) -> str:
"""
Expand All @@ -225,8 +258,8 @@ def register(self, recording_uri: str, *, recording_layer: str = "base", timeout

Returns
-------
partition_id: str
The partition ID of the registered RRD.
segment_id: str
The segment ID of the registered RRD.

"""

Expand Down Expand Up @@ -281,10 +314,15 @@ def register_prefix(self, recordings_prefix: str, layer_name: str | None = None)

return self._internal.register_prefix(recordings_prefix, layer_name=layer_name)

def download_segment(self, segment_id: str) -> Recording:
"""Download a segment from the dataset."""

return self._internal.download_segment(segment_id)

@deprecated("Use download_segment() instead")
def download_partition(self, partition_id: str) -> Recording:
"""Download a partition from the dataset."""

return self._internal.download_partition(partition_id)
return self.download_segment(partition_id)

def dataframe_query_view(
self,
Expand All @@ -310,7 +348,7 @@ def dataframe_query_view(
monotonically increasing when data is sent from a single process.

If `None` is passed as the index, the view will contain only static columns (among those
specified) and no index columns. It will also contain a single row per partition.
specified) and no index columns. It will also contain a single row per segment.

Parameters
----------
Expand Down
Loading
Loading