Skip to content

Commit 6b56966

Browse files
authored
remove GetChunks Cloud API and all its usage (#11449)
1 parent e007e07 commit 6b56966

File tree

11 files changed

+193
-802
lines changed

11 files changed

+193
-802
lines changed

crates/store/re_datafusion/src/dataframe_query_common.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ use re_dataframe::external::re_chunk_store::ChunkStore;
2323
use re_dataframe::{Index, QueryExpression};
2424
use re_log_encoding::codec::wire::decoder::Decode as _;
2525
use re_log_types::EntryId;
26-
use re_protos::cloud::v1alpha1::DATASET_MANIFEST_ID_FIELD_NAME;
2726
use re_protos::cloud::v1alpha1::ext::{Query, QueryLatestAt, QueryRange};
27+
use re_protos::cloud::v1alpha1::{DATASET_MANIFEST_ID_FIELD_NAME, QueryDatasetResponse};
2828
use re_protos::cloud::v1alpha1::{GetDatasetSchemaRequest, QueryDatasetRequest};
2929
use re_protos::common::v1alpha1::ext::ScanParameters;
3030
use re_protos::headers::RerunHeadersInjectorExt as _;
@@ -106,10 +106,10 @@ impl DataframeQueryTableProvider {
106106
let query = query_from_query_expression(query_expression);
107107

108108
let fields_of_interest = [
109-
"chunk_partition_id",
110-
"chunk_id",
111-
"rerun_partition_layer",
112-
"chunk_key",
109+
QueryDatasetResponse::PARTITION_ID,
110+
QueryDatasetResponse::CHUNK_ID,
111+
QueryDatasetResponse::PARTITION_LAYER,
112+
QueryDatasetResponse::CHUNK_KEY,
113113
]
114114
.into_iter()
115115
.map(String::from)
@@ -346,7 +346,7 @@ fn compute_schema_for_query(
346346
// Create the actual filter to apply to the column descriptors
347347
let filter = ChunkStore::create_component_filter_from_query(query_expression);
348348

349-
// When we call GetChunks we will not return row_id, so we only select indices and
349+
// When we call QueryDataset we will not return row_id, so we only select indices and
350350
// components from the column descriptors.
351351
let filtered_fields = column_descriptors
352352
.filter_components(filter)

crates/store/re_datafusion/src/dataframe_query_provider_wasm.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ impl DataframePartitionStream {
9999
chunks_and_partition_ids.map_err(|err| exec_datafusion_err!("{err}"))?;
100100

101101
let _span = tracing::trace_span!(
102-
"get_chunks::batch_insert",
102+
"fetch_chunks::batch_insert",
103103
num_chunks = chunks_and_partition_ids.len()
104104
)
105105
.entered();

crates/store/re_protos/proto/rerun/v1alpha1/cloud.proto

Lines changed: 1 addition & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -110,25 +110,13 @@ service RerunCloudService {
110110
// * Latest-at, range and dataframe queries.
111111
// * Arbitrary Lance filters.
112112
//
113-
// To fetch the actual chunks themselves, see `GetChunks`.
113+
// To fetch the actual chunks themselves, see `FetchChunks`.
114114
//
115115
// Passing chunk IDs to this method effectively acts as a IF_EXIST filter.
116116
//
117117
// This endpoint requires the standard dataset headers.
118118
rpc QueryDataset(QueryDatasetRequest) returns (stream QueryDatasetResponse) {}
119119

120-
// Perform Rerun-native queries on a dataset, returning the underlying chunks.
121-
//
122-
// These Rerun-native queries include:
123-
// * Filtering by specific partition and chunk IDs.
124-
// * Latest-at, range and dataframe queries.
125-
// * Arbitrary Lance filters.
126-
//
127-
// To fetch only the actual chunk IDs rather than the chunks themselves, see `QueryDataset`.
128-
//
129-
// This endpoint requires the standard dataset headers.
130-
rpc GetChunks(GetChunksRequest) returns (stream GetChunksResponse) {}
131-
132120
// Fetch specific chunks from Rerun Cloud. In a 2-step query process, result of 1st phase,
133121
// that is, the result of `QueryDataset` should include all the necessary information to send
134122
// the actual chunk requests, which is the 2nd step of the query process.
@@ -530,63 +518,6 @@ message QueryRange {
530518
reserved "fuzzy_descriptors";
531519
}
532520

533-
message GetChunksRequest {
534-
// Client can specify from which partitions to get chunks. If left unspecified (empty list),
535-
// data from all partition (that match other query parameters) will be included.
536-
repeated rerun.common.v1alpha1.PartitionId partition_ids = 2;
537-
538-
// Client can specify chunk ids to include. If left unspecified (empty list),
539-
// all chunks (that match other query parameters) will be included.
540-
repeated rerun.common.v1alpha1.Tuid chunk_ids = 3;
541-
542-
// Which entity paths are we interested in? Leave empty, and set `select_all_entity_paths`,
543-
// in order to query all of them.
544-
repeated rerun.common.v1alpha1.EntityPath entity_paths = 4;
545-
546-
// If set, the query will cover all existing entity paths.
547-
//
548-
// `entity_paths` must be empty, otherwise an error will be raised.
549-
//
550-
// Truth table:
551-
// ```text
552-
// select_all_entity_paths | entity_paths | result
553-
// ------------------------+----------------+--------
554-
// false | [] | valid query, empty results (no entity paths selected)
555-
// false | ['foo', 'bar'] | valid query, 'foo' & 'bar' selected
556-
// true | [] | valid query, all entity paths selected
557-
// true | ['foo', 'bar'] | invalid query, error
558-
// ```
559-
bool select_all_entity_paths = 6;
560-
561-
// Which components are we interested in?
562-
//
563-
// If left unspecified, all existing components are considered of interest.
564-
//
565-
// This will perform a basic fuzzy match on the available columns' descriptors.
566-
// The fuzzy logic is a simple case-sensitive `contains()` query.
567-
// For example, given a `log_tick__SeriesLines:width` index, all of the following
568-
// would match: `SeriesLines:width`, `Width`, `SeriesLines`, etc.
569-
repeated string fuzzy_descriptors = 9;
570-
571-
// If set, static data will be excluded from the results.
572-
bool exclude_static_data = 7;
573-
574-
// If set, temporal data will be excluded from the results.
575-
bool exclude_temporal_data = 8;
576-
577-
// Query details
578-
Query query = 5;
579-
580-
reserved 1;
581-
reserved "dataset_id";
582-
}
583-
584-
message GetChunksResponse {
585-
// Every gRPC response, even within the confines of a stream, involves HTTP2 overhead, which isn't
586-
// cheap by any means, which is why we're returning a batch of `ArrowMsg` rather than a single one.
587-
repeated rerun.log_msg.v1alpha1.ArrowMsg chunks = 1;
588-
}
589-
590521
message FetchChunksRequest {
591522
// Information about the chunks to fetch. These dataframes have to include the following columns:
592523
// * `chunk_id` - Chunk unique identifier

crates/store/re_protos/src/v1alpha1/rerun.cloud.v1alpha1.ext.rs

Lines changed: 10 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use re_chunk::TimelineName;
1111
use re_log_types::{EntityPath, EntryId, TimeInt};
1212
use re_sorbet::ComponentColumnDescriptor;
1313

14-
use crate::cloud::v1alpha1::{EntryKind, QueryTasksResponse};
14+
use crate::cloud::v1alpha1::{EntryKind, QueryDatasetResponse, QueryTasksResponse};
1515
use crate::cloud::v1alpha1::{
1616
GetDatasetSchemaResponse, RegisterWithDatasetResponse, ScanPartitionTableResponse,
1717
VectorDistanceMetric,
@@ -121,61 +121,18 @@ impl TryFrom<crate::cloud::v1alpha1::QueryDatasetRequest> for QueryDatasetReques
121121
}
122122
}
123123

124-
// --- GetChunksRequest --
124+
// --- QueryDatasetResponse ---
125125

126-
#[derive(Debug, Clone)]
127-
pub struct GetChunksRequest {
128-
pub partition_ids: Vec<crate::common::v1alpha1::ext::PartitionId>,
129-
pub chunk_ids: Vec<re_chunk::ChunkId>,
130-
pub entity_paths: Vec<EntityPath>,
131-
pub select_all_entity_paths: bool,
132-
pub fuzzy_descriptors: Vec<String>,
133-
pub exclude_static_data: bool,
134-
pub exclude_temporal_data: bool,
135-
pub query: Option<Query>,
126+
impl QueryDatasetResponse {
127+
pub const PARTITION_ID: &str = "chunk_partition_id";
128+
pub const CHUNK_ID: &str = "chunk_id";
129+
pub const PARTITION_LAYER: &str = RegisterWithDatasetResponse::PARTITION_LAYER;
130+
pub const CHUNK_KEY: &str = "chunk_key";
131+
pub const CHUNK_IS_STATIC: &str = "chunk_is_static";
132+
pub const CHUNK_ENTITY_PATH: &str = "chunk_entity_path";
136133
}
137134

138-
impl TryFrom<crate::cloud::v1alpha1::GetChunksRequest> for GetChunksRequest {
139-
type Error = tonic::Status;
140-
141-
fn try_from(value: crate::cloud::v1alpha1::GetChunksRequest) -> Result<Self, Self::Error> {
142-
Ok(Self {
143-
partition_ids: value
144-
.partition_ids
145-
.into_iter()
146-
.map(TryInto::try_into)
147-
.collect::<Result<Vec<_>, _>>()?,
148-
149-
chunk_ids: value
150-
.chunk_ids
151-
.into_iter()
152-
.map(|tuid| {
153-
let id: re_tuid::Tuid = tuid.try_into()?;
154-
Ok::<_, tonic::Status>(re_chunk::ChunkId::from_u128(id.as_u128()))
155-
})
156-
.collect::<Result<Vec<_>, _>>()?,
157-
158-
entity_paths: value
159-
.entity_paths
160-
.into_iter()
161-
.map(|path| {
162-
path.try_into().map_err(|err| {
163-
tonic::Status::invalid_argument(format!("invalid entity path: {err}"))
164-
})
165-
})
166-
.collect::<Result<Vec<_>, _>>()?,
167-
168-
select_all_entity_paths: value.select_all_entity_paths,
169-
170-
fuzzy_descriptors: value.fuzzy_descriptors,
171-
172-
exclude_static_data: value.exclude_static_data,
173-
exclude_temporal_data: value.exclude_temporal_data,
174-
175-
query: value.query.map(|q| q.try_into()).transpose()?,
176-
})
177-
}
178-
}
135+
// --- DoMaintenanceRequest ---
179136

180137
#[derive(Debug, Clone)]
181138
pub struct DoMaintenanceRequest {

0 commit comments

Comments
 (0)