Skip to content

Commit 5f40798

Browse files
committed
rewrite all load api calls to return RecordBatch
1 parent d330d7f commit 5f40798

File tree

5 files changed

+188
-95
lines changed

5 files changed

+188
-95
lines changed

packages/cubejs-backend-native/src/orchestrator.rs

Lines changed: 74 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@ use crate::node_obj_deserializer::JsValueDeserializer;
22
use crate::transport::MapCubeErrExt;
33
use cubeorchestrator::query_message_parser::QueryResult;
44
use cubeorchestrator::query_result_transform::{
5-
RequestResultData, RequestResultDataMulti, TransformedData,
5+
DBResponsePrimitive, RequestResultData, RequestResultDataMulti, TransformedData,
66
};
77
use cubeorchestrator::transport::{JsRawData, TransformDataRequest};
8+
use cubesql::compile::engine::df::scan::{FieldValue, ValueObject};
89
use cubesql::CubeError;
910
use neon::context::{Context, FunctionContext, ModuleContext};
1011
use neon::handle::Handle;
@@ -15,6 +16,7 @@ use neon::prelude::{
1516
};
1617
use neon::types::buffer::TypedArray;
1718
use serde::Deserialize;
19+
use std::borrow::Cow;
1820
use std::sync::Arc;
1921

2022
pub fn register_module(cx: &mut ModuleContext) -> NeonResult<()> {
@@ -39,6 +41,7 @@ pub fn register_module(cx: &mut ModuleContext) -> NeonResult<()> {
3941
pub struct ResultWrapper {
4042
transform_data: TransformDataRequest,
4143
data: Arc<QueryResult>,
44+
transformed_data: Option<TransformedData>,
4245
}
4346

4447
impl ResultWrapper {
@@ -115,14 +118,80 @@ impl ResultWrapper {
115118
Ok(Self {
116119
transform_data: transform_request,
117120
data: query_result,
121+
transformed_data: None,
118122
})
119123
}
120124

121-
pub fn transform_result(&self) -> Result<TransformedData, CubeError> {
122-
let transformed = TransformedData::transform(&self.transform_data, &self.data)
123-
.map_cube_err("Can't prepare transformed data")?;
125+
pub fn transform_result(&mut self) -> Result<(), CubeError> {
126+
self.transformed_data = Some(
127+
TransformedData::transform(&self.transform_data, &self.data)
128+
.map_cube_err("Can't prepare transformed data")?,
129+
);
124130

125-
Ok(transformed)
131+
Ok(())
132+
}
133+
}
134+
135+
impl ValueObject for ResultWrapper {
136+
fn len(&mut self) -> Result<usize, CubeError> {
137+
if self.transformed_data.is_none() {
138+
self.transform_result()?;
139+
}
140+
141+
let data = self.transformed_data.as_ref().unwrap();
142+
143+
match data {
144+
TransformedData::Compact {
145+
members: _members,
146+
dataset,
147+
} => Ok(dataset.len()),
148+
TransformedData::Vanilla(dataset) => Ok(dataset.len()),
149+
}
150+
}
151+
152+
fn get(&mut self, index: usize, field_name: &str) -> Result<FieldValue, CubeError> {
153+
if self.transformed_data.is_none() {
154+
self.transform_result()?;
155+
}
156+
157+
let data = self.transformed_data.as_ref().unwrap();
158+
159+
let value = match data {
160+
TransformedData::Compact { members, dataset } => {
161+
let Some(row) = dataset.get(index) else {
162+
return Err(CubeError::user(format!(
163+
"Unexpected response from Cube, can't get {} row",
164+
index
165+
)));
166+
};
167+
168+
let Some(member_index) = members.iter().position(|m| m == field_name) else {
169+
return Err(CubeError::user(format!(
170+
"Field name '{}' not found in members",
171+
field_name
172+
)));
173+
};
174+
175+
row.get(member_index).unwrap_or(&DBResponsePrimitive::Null)
176+
}
177+
TransformedData::Vanilla(dataset) => {
178+
let Some(row) = dataset.get(index) else {
179+
return Err(CubeError::user(format!(
180+
"Unexpected response from Cube, can't get {} row",
181+
index
182+
)));
183+
};
184+
185+
row.get(field_name).unwrap_or(&DBResponsePrimitive::Null)
186+
}
187+
};
188+
189+
Ok(match value {
190+
DBResponsePrimitive::String(s) => FieldValue::String(Cow::Borrowed(s)),
191+
DBResponsePrimitive::Number(n) => FieldValue::Number(*n),
192+
DBResponsePrimitive::Boolean(b) => FieldValue::Bool(*b),
193+
DBResponsePrimitive::Null => FieldValue::Null,
194+
})
126195
}
127196
}
128197

packages/cubejs-backend-native/src/transport.rs

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ use crate::{
1212
stream::call_js_with_stream_as_callback,
1313
};
1414
use async_trait::async_trait;
15-
use cubesql::compile::engine::df::scan::{MemberField, SchemaRef};
15+
use cubesql::compile::engine::df::scan::{
16+
convert_transport_response, transform_response, MemberField, RecordBatch, SchemaRef,
17+
};
1618
use cubesql::compile::engine::df::wrapper::SqlQuery;
1719
use cubesql::transport::{
1820
SpanId, SqlGenerator, SqlResponse, TransportLoadRequestQuery, TransportLoadResponse,
@@ -334,9 +336,9 @@ impl TransportService for NodeBridgeTransport {
334336
sql_query: Option<SqlQuery>,
335337
ctx: AuthContextRef,
336338
meta: LoadRequestMeta,
337-
_schema: SchemaRef,
338-
// ) -> Result<Vec<RecordBatch>, CubeError> {
339-
) -> Result<TransportLoadResponse, CubeError> {
339+
schema: SchemaRef,
340+
member_fields: Vec<MemberField>,
341+
) -> Result<Vec<RecordBatch>, CubeError> {
340342
trace!("[transport] Request ->");
341343

342344
let native_auth = ctx
@@ -461,20 +463,23 @@ impl TransportService for NodeBridgeTransport {
461463
}
462464
};
463465

464-
break serde_json::from_value::<TransportLoadResponse>(response)
466+
let response = match serde_json::from_value::<TransportLoadResponse>(response) {
467+
Ok(v) => v,
468+
Err(err) => {
469+
return Err(CubeError::user(err.to_string()));
470+
}
471+
};
472+
473+
break convert_transport_response(response, schema.clone(), member_fields)
465474
.map_err(|err| CubeError::user(err.to_string()));
466475
}
467476
ValueFromJs::ResultWrapper(result_wrappers) => {
468-
let response = TransportLoadResponse {
469-
pivot_query: None,
470-
slow_query: None,
471-
query_type: None,
472-
results: result_wrappers
473-
.into_iter()
474-
.map(|v| v.transform_result().unwrap().into())
475-
.collect(),
476-
};
477-
break Ok(response);
477+
break result_wrappers
478+
.into_iter()
479+
.map(|mut wrapper| {
480+
transform_response(&mut wrapper, schema.clone(), &member_fields)
481+
})
482+
.collect::<Result<Vec<_>, _>>();
478483
}
479484
}
480485
}

rust/cubesql/cubesql/src/compile/engine/df/scan.rs

Lines changed: 71 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use async_trait::async_trait;
2-
use cubeclient::models::{V1LoadRequestQuery, V1LoadResult, V1LoadResultAnnotation};
2+
use cubeclient::models::{V1LoadRequestQuery, V1LoadResponse};
33
pub use datafusion::{
44
arrow::{
55
array::{
@@ -52,7 +52,7 @@ use datafusion::{
5252
logical_plan::JoinType,
5353
scalar::ScalarValue,
5454
};
55-
use serde_json::{json, Value};
55+
use serde_json::Value;
5656

5757
#[derive(Debug, Clone, Eq, PartialEq)]
5858
pub enum MemberField {
@@ -655,28 +655,22 @@ impl ExecutionPlan for CubeScanExecutionPlan {
655655
)));
656656
}
657657

658-
let mut response = JsonValueObject::new(
659-
load_data(
660-
self.span_id.clone(),
661-
request,
662-
self.auth_context.clone(),
663-
self.transport.clone(),
664-
meta.clone(),
665-
self.schema.clone(),
666-
self.options.clone(),
667-
self.wrapped_sql.clone(),
668-
)
669-
.await?
670-
.data,
671-
);
672-
one_shot_stream.data = Some(
673-
transform_response(
674-
&mut response,
675-
one_shot_stream.schema.clone(),
676-
&one_shot_stream.member_fields,
677-
)
678-
.map_err(|e| DataFusionError::Execution(e.message.to_string()))?,
679-
);
658+
let response = load_data(
659+
self.span_id.clone(),
660+
request,
661+
self.auth_context.clone(),
662+
self.transport.clone(),
663+
meta.clone(),
664+
self.schema.clone(),
665+
self.member_fields.clone(),
666+
self.options.clone(),
667+
self.wrapped_sql.clone(),
668+
)
669+
.await?;
670+
671+
// For now execute method executes only one query at a time, so we
672+
// take the first result
673+
one_shot_stream.data = Some(response.first().unwrap().clone());
680674

681675
Ok(Box::pin(CubeScanStreamRouter::new(
682676
None,
@@ -846,9 +840,10 @@ async fn load_data(
846840
transport: Arc<dyn TransportService>,
847841
meta: LoadRequestMeta,
848842
schema: SchemaRef,
843+
member_fields: Vec<MemberField>,
849844
options: CubeScanOptions,
850845
sql_query: Option<SqlQuery>,
851-
) -> ArrowResult<V1LoadResult> {
846+
) -> ArrowResult<Vec<RecordBatch>> {
852847
let no_members_query = request.measures.as_ref().map(|v| v.len()).unwrap_or(0) == 0
853848
&& request.dimensions.as_ref().map(|v| v.len()).unwrap_or(0) == 0
854849
&& request
@@ -866,22 +861,27 @@ async fn load_data(
866861
data.push(serde_json::Value::Null)
867862
}
868863

869-
V1LoadResult::new(
870-
V1LoadResultAnnotation {
871-
measures: json!(Vec::<serde_json::Value>::new()),
872-
dimensions: json!(Vec::<serde_json::Value>::new()),
873-
segments: json!(Vec::<serde_json::Value>::new()),
874-
time_dimensions: json!(Vec::<serde_json::Value>::new()),
875-
},
876-
data,
877-
)
864+
let mut response = JsonValueObject::new(data);
865+
let rec = transform_response(&mut response, schema.clone(), &member_fields)
866+
.map_err(|e| DataFusionError::Execution(e.message.to_string()))?;
867+
868+
rec
878869
} else {
879870
let result = transport
880-
.load(span_id, request, sql_query, auth_context, meta, schema)
881-
.await;
882-
let mut response = result.map_err(|err| ArrowError::ComputeError(err.to_string()))?;
883-
if let Some(data) = response.results.pop() {
884-
match (options.max_records, data.data.len()) {
871+
.load(
872+
span_id,
873+
request,
874+
sql_query,
875+
auth_context,
876+
meta,
877+
schema,
878+
member_fields,
879+
)
880+
.await
881+
.map_err(|err| ArrowError::ComputeError(err.to_string()))?;
882+
let response = result.first();
883+
if let Some(data) = response.cloned() {
884+
match (options.max_records, data.num_rows()) {
885885
(Some(max_records), len) if len >= max_records => {
886886
return Err(ArrowError::ComputeError(format!("One of the Cube queries exceeded the maximum row limit ({}). JOIN/UNION is not possible as it will produce incorrect results. Try filtering the results more precisely or moving post-processing functions to an outer query.", max_records)));
887887
}
@@ -896,7 +896,7 @@ async fn load_data(
896896
}
897897
};
898898

899-
Ok(result)
899+
Ok(vec![result])
900900
}
901901

902902
fn load_to_stream_sync(one_shot_stream: &mut CubeScanOneShotStream) -> Result<()> {
@@ -906,6 +906,7 @@ fn load_to_stream_sync(one_shot_stream: &mut CubeScanOneShotStream) -> Result<()
906906
let transport = one_shot_stream.transport.clone();
907907
let meta = one_shot_stream.meta.clone();
908908
let schema = one_shot_stream.schema.clone();
909+
let member_fields = one_shot_stream.member_fields.clone();
909910
let options = one_shot_stream.options.clone();
910911
let wrapped_sql = one_shot_stream.wrapped_sql.clone();
911912

@@ -918,22 +919,16 @@ fn load_to_stream_sync(one_shot_stream: &mut CubeScanOneShotStream) -> Result<()
918919
transport,
919920
meta,
920921
schema,
922+
member_fields,
921923
options,
922924
wrapped_sql,
923925
))
924926
})
925927
.join()
926-
.map_err(|_| DataFusionError::Execution(format!("Can't load to stream")))?;
927-
928-
let mut response = JsonValueObject::new(res.unwrap().data);
929-
one_shot_stream.data = Some(
930-
transform_response(
931-
&mut response,
932-
one_shot_stream.schema.clone(),
933-
&one_shot_stream.member_fields,
934-
)
935-
.map_err(|e| DataFusionError::Execution(e.message.to_string()))?,
936-
);
928+
.map_err(|_| DataFusionError::Execution(format!("Can't load to stream")))??;
929+
930+
let response = res.first();
931+
one_shot_stream.data = Some(response.cloned().unwrap());
937932

938933
Ok(())
939934
}
@@ -1339,6 +1334,21 @@ pub fn transform_response<V: ValueObject>(
13391334
Ok(RecordBatch::try_new(schema.clone(), columns)?)
13401335
}
13411336

1337+
pub fn convert_transport_response(
1338+
response: V1LoadResponse,
1339+
schema: SchemaRef,
1340+
member_fields: Vec<MemberField>,
1341+
) -> std::result::Result<Vec<RecordBatch>, CubeError> {
1342+
response
1343+
.results
1344+
.into_iter()
1345+
.map(|r| {
1346+
let mut response = JsonValueObject::new(r.data.clone());
1347+
transform_response(&mut response, schema.clone(), &member_fields)
1348+
})
1349+
.collect::<std::result::Result<Vec<RecordBatch>, CubeError>>()
1350+
}
1351+
13421352
#[cfg(test)]
13431353
mod tests {
13441354
use super::*;
@@ -1402,10 +1412,12 @@ mod tests {
14021412
_sql_query: Option<SqlQuery>,
14031413
_ctx: AuthContextRef,
14041414
_meta_fields: LoadRequestMeta,
1405-
_schema: SchemaRef,
1406-
) -> Result<V1LoadResponse, CubeError> {
1415+
schema: SchemaRef,
1416+
member_fields: Vec<MemberField>,
1417+
) -> Result<Vec<RecordBatch>, CubeError> {
14071418
let response = r#"
1408-
{
1419+
{
1420+
"results": [{
14091421
"annotation": {
14101422
"measures": [],
14111423
"dimensions": [],
@@ -1419,17 +1431,13 @@ mod tests {
14191431
{"KibanaSampleDataEcommerce.count": null, "KibanaSampleDataEcommerce.maxPrice": null, "KibanaSampleDataEcommerce.isBool": "true", "KibanaSampleDataEcommerce.orderDate": "9999-12-31 00:00:00.000", "KibanaSampleDataEcommerce.city": "City 4"},
14201432
{"KibanaSampleDataEcommerce.count": null, "KibanaSampleDataEcommerce.maxPrice": null, "KibanaSampleDataEcommerce.isBool": "false", "KibanaSampleDataEcommerce.orderDate": null, "KibanaSampleDataEcommerce.city": null}
14211433
]
1422-
}
1434+
}]
1435+
}
14231436
"#;
14241437

1425-
let result: V1LoadResult = serde_json::from_str(response).unwrap();
1426-
1427-
Ok(V1LoadResponse {
1428-
pivot_query: None,
1429-
slow_query: None,
1430-
query_type: None,
1431-
results: vec![result],
1432-
})
1438+
let result: V1LoadResponse = serde_json::from_str(response).unwrap();
1439+
convert_transport_response(result, schema.clone(), member_fields)
1440+
.map_err(|err| CubeError::user(err.to_string()))
14331441
}
14341442

14351443
async fn load_stream(

0 commit comments

Comments
 (0)