Skip to content

Commit 0ba6de3

Browse files
authored
Introduce FetchChunks API (#11054)
# What Supersedes * #10852 Introducing the new ``FetchChunks`` API as per approach we aligned on last week: * output of ``Query`` call is a dataframe we can now pass to the new ``FetchChunks`` call (this can be see on the Cloud side and the new integration tests) * since we're dealing with dataframes, definition of ``ChunkKey`` has moved to Cloud
1 parent c0abb36 commit 0ba6de3

File tree

4 files changed

+170
-4
lines changed

4 files changed

+170
-4
lines changed

crates/store/re_protos/proto/rerun/v1alpha1/cloud.proto

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,11 @@ service RerunCloudService {
7474
// Search a previously created index.
7575
rpc SearchDataset(SearchDatasetRequest) returns (stream SearchDatasetResponse) {}
7676

77-
// Perform Rerun-native queries on a dataset, returning the matching chunk IDs.
77+
// Perform Rerun-native queries on a dataset, returning the matching chunk IDs, as well
78+
// as information that can be sent back to Rerun Cloud to fetch the actual chunks as part
79+
// of `FetchChunks` request. In this 2-step query process, 1st step is getting information
80+
// from the server about the chunks that contain relevant information. 2nd step is fetching
81+
// those chunks (the actual data).
7882
//
7983
// These Rerun-native queries include:
8084
// * Filtering by specific partition and chunk IDs.
@@ -96,6 +100,13 @@ service RerunCloudService {
96100
// To fetch only the actual chunk IDs rather than the chunks themselves, see `QueryDataset`.
97101
rpc GetChunks(GetChunksRequest) returns (stream GetChunksResponse) {}
98102

103+
// Fetch specific chunks from Rerun Cloud. In a 2-step query process, result of 1st phase,
104+
// that is, the result of `QueryDataset` should include all the necessary information to send
105+
// the actual chunk requests, which is the 2nd step of the query process.
106+
//
107+
// See `FetchChunksRequest` for details on the fields that describe each individual chunk.
108+
rpc FetchChunks(FetchChunksRequest) returns (stream FetchChunksResponse) {}
109+
99110
// --- Tables ---
100111
// TODO(jleibs): This will be replaced / extended by Arrow Flight
101112

@@ -533,6 +544,21 @@ message GetChunksResponse {
533544
repeated rerun.log_msg.v1alpha1.ArrowMsg chunks = 1;
534545
}
535546

547+
message FetchChunksRequest {
548+
// Information about the chunks to fetch. These dataframes have to include the following columns:
549+
// * `chunk_id` - Chunk unique identifier
550+
// * `partition_id` - partition this chunk belongs to. Currently needed as we pass this metadata back and forth
551+
// * `partition_layer` - specific partition layer. Currently needed as we pass this metadata back and forth
552+
// * `chunk_key` - chunk location details
553+
repeated rerun.common.v1alpha1.DataframePart chunk_infos = 1;
554+
}
555+
556+
message FetchChunksResponse {
557+
// Every gRPC response, even within the confines of a stream, involves HTTP2 overhead, which isn't
558+
// cheap by any means, which is why we're returning a batch of `ArrowMsg` rather than a single one.
559+
repeated rerun.log_msg.v1alpha1.ArrowMsg chunks = 1;
560+
}
561+
536562
// --- Table Apis ---
537563

538564
message GetTableSchemaRequest {

crates/store/re_protos/src/v1alpha1/rerun.cloud.v1alpha1.ext.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1255,7 +1255,7 @@ impl ScanPartitionTableResponse {
12551255
// --- DataSource --
12561256

12571257
// NOTE: Match the values of the Protobuf definition to keep life simple.
1258-
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
1258+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
12591259
pub enum DataSourceKind {
12601260
Rrd = 1,
12611261
}

crates/store/re_protos/src/v1alpha1/rerun.cloud.v1alpha1.rs

Lines changed: 131 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/store/re_server/src/rerun_cloud.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ macro_rules! decl_stream {
162162
};
163163
}
164164

165+
decl_stream!(FetchChunksResponseStream<manifest:FetchChunksResponse>);
165166
decl_stream!(GetChunksResponseStream<manifest:GetChunksResponse>);
166167
decl_stream!(QueryDatasetResponseStream<manifest:QueryDatasetResponse>);
167168
decl_stream!(ScanPartitionTableResponseStream<manifest:ScanPartitionTableResponse>);
@@ -894,6 +895,16 @@ impl RerunCloudService for RerunCloudHandler {
894895
))
895896
}
896897

898+
type FetchChunksStream = FetchChunksResponseStream;
899+
900+
async fn fetch_chunks(
901+
&self,
902+
_request: tonic::Request<re_protos::cloud::v1alpha1::FetchChunksRequest>,
903+
) -> std::result::Result<tonic::Response<Self::FetchChunksStream>, tonic::Status> {
904+
// TODO(zehiko) implement fetch_chunks
905+
Err(tonic::Status::unimplemented("fetch_chunks not implemented"))
906+
}
907+
897908
// --- Table APIs ---
898909

899910
async fn register_table(

0 commit comments

Comments
 (0)