Skip to content

Commit 2c01a51

Browse files
committed
chore(cubestore): Upgrade DF: fix create table with location tests
1 parent 9f11549 commit 2c01a51

File tree

4 files changed

+200
-155
lines changed

4 files changed

+200
-155
lines changed

rust/cubestore/cubestore-sql-tests/src/tests.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2263,7 +2263,7 @@ async fn create_table_with_url(service: Box<dyn SqlClient>) {
22632263
.exec_query("CREATE SCHEMA IF NOT EXISTS foo")
22642264
.await
22652265
.unwrap();
2266-
let create_table_sql = format!("CREATE TABLE foo.bikes (`Response ID` int, `Start Date` text, `End Date` text) LOCATION '{}'", url);
2266+
let create_table_sql = format!("CREATE TABLE foo.bikes (`Response ID` int, `Start Date` text, `End Date` text) WITH (input_format = 'csv') LOCATION '{}'", url);
22672267
let (_, query_result) = tokio::join!(
22682268
service.exec_query(&create_table_sql),
22692269
service.exec_query("SELECT count(*) from foo.bikes")

rust/cubestore/cubestore/src/queryplanner/planning.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ use datafusion::logical_expr::{
6262
expr, Aggregate, BinaryExpr, Expr, Extension, Filter, Join, Limit, LogicalPlan, Operator,
6363
Projection, Sort, SortExpr, SubqueryAlias, TableScan, Union, UserDefinedLogicalNode,
6464
};
65+
use datafusion::physical_expr::{Distribution, LexRequirement};
6566
use datafusion::physical_plan::repartition::RepartitionExec;
6667
use datafusion::physical_planner::{ExtensionPlanner, PhysicalPlanner};
6768
use serde::{Deserialize as SerdeDeser, Deserializer, Serialize as SerdeSer, Serializer};
@@ -1720,6 +1721,28 @@ impl ExecutionPlan for WorkerExec {
17201721
fn properties(&self) -> &PlanProperties {
17211722
self.input.properties()
17221723
}
1724+
1725+
fn required_input_distribution(&self) -> Vec<Distribution> {
1726+
vec![Distribution::SinglePartition; self.children().len()]
1727+
}
1728+
1729+
fn required_input_ordering(&self) -> Vec<Option<LexRequirement>> {
1730+
let input_ordering = self.input.required_input_ordering();
1731+
if !input_ordering.is_empty() {
1732+
vec![input_ordering[0].clone()]
1733+
} else {
1734+
vec![None]
1735+
}
1736+
}
1737+
1738+
fn maintains_input_order(&self) -> Vec<bool> {
1739+
let maintains_input_order = self.input.maintains_input_order();
1740+
if !maintains_input_order.is_empty() {
1741+
vec![maintains_input_order[0]]
1742+
} else {
1743+
vec![false]
1744+
}
1745+
}
17231746
}
17241747

17251748
/// Use this to pick the part of the plan that the worker must execute.

rust/cubestore/cubestore/src/queryplanner/query_executor.rs

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,11 @@ use crate::util::memory::MemoryHandler;
2222
use crate::{app_metrics, CubeError};
2323
use async_trait::async_trait;
2424
use core::fmt;
25-
use datafusion::arrow::array::{make_array, Array, ArrayRef, BinaryArray, BooleanArray, Decimal128Array, Float64Array, Int16Array, Int32Array, Int64Array, MutableArrayData, StringArray, TimestampMicrosecondArray, TimestampNanosecondArray, UInt16Array, UInt32Array, UInt64Array};
25+
use datafusion::arrow::array::{
26+
make_array, Array, ArrayRef, BinaryArray, BooleanArray, Decimal128Array, Float64Array,
27+
Int16Array, Int32Array, Int64Array, MutableArrayData, StringArray, TimestampMicrosecondArray,
28+
TimestampNanosecondArray, UInt16Array, UInt32Array, UInt64Array,
29+
};
2630
use datafusion::arrow::compute::SortOptions;
2731
use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
2832
use datafusion::arrow::ipc::reader::StreamReader;
@@ -43,9 +47,11 @@ use datafusion::execution::{SessionStateBuilder, TaskContext};
4347
use datafusion::logical_expr::{Expr, LogicalPlan};
4448
use datafusion::physical_expr;
4549
use datafusion::physical_expr::{
46-
expressions, EquivalenceProperties, LexRequirement, PhysicalSortExpr, PhysicalSortRequirement,
50+
expressions, Distribution, EquivalenceProperties, LexRequirement, PhysicalSortExpr,
51+
PhysicalSortRequirement,
4752
};
4853
use datafusion::physical_optimizer::optimizer::PhysicalOptimizer;
54+
use datafusion::physical_optimizer::PhysicalOptimizerRule;
4955
use datafusion::physical_plan::empty::EmptyExec;
5056
use datafusion::physical_plan::memory::MemoryExec;
5157
use datafusion::physical_plan::projection::ProjectionExec;
@@ -607,15 +613,13 @@ impl CubeTable {
607613
.get(remote_path.as_str())
608614
.expect(format!("Missing remote path {}", remote_path).as_str());
609615

610-
let file_scan = FileScanConfig::new(
611-
ObjectStoreUrl::local_filesystem(),
612-
index_schema.clone(),
613-
)
614-
.with_file(PartitionedFile::from_path(local_path.to_string())?)
615-
.with_projection(index_projection_or_none_on_schema_match.clone())
616-
.with_output_ordering(vec![(0..key_len)
617-
.map(|i| -> Result<_, DataFusionError> {
618-
Ok(PhysicalSortExpr::new(
616+
let file_scan =
617+
FileScanConfig::new(ObjectStoreUrl::local_filesystem(), index_schema.clone())
618+
.with_file(PartitionedFile::from_path(local_path.to_string())?)
619+
.with_projection(index_projection_or_none_on_schema_match.clone())
620+
.with_output_ordering(vec![(0..key_len)
621+
.map(|i| -> Result<_, DataFusionError> {
622+
Ok(PhysicalSortExpr::new(
619623
Arc::new(
620624
datafusion::physical_expr::expressions::Column::new_with_schema(
621625
index_schema.field(i).name(),
@@ -624,8 +628,8 @@ impl CubeTable {
624628
),
625629
SortOptions::default(),
626630
))
627-
})
628-
.collect::<Result<Vec<_>, _>>()?]);
631+
})
632+
.collect::<Result<Vec<_>, _>>()?]);
629633
let parquet_exec = ParquetExecBuilder::new(file_scan)
630634
.with_parquet_file_reader_factory(self.parquet_metadata_cache.clone())
631635
.build();
@@ -982,7 +986,7 @@ impl ExecutionPlan for CubeTableExec {
982986
sort_order = None
983987
}
984988
}
985-
vec![sort_order.map(|order| {
989+
let order = sort_order.map(|order| {
986990
order
987991
.into_iter()
988992
.map(|col_index| {
@@ -999,7 +1003,9 @@ impl ExecutionPlan for CubeTableExec {
9991003
))
10001004
})
10011005
.collect()
1002-
})]
1006+
});
1007+
1008+
(0..self.children().len()).map(|_| order.clone()).collect()
10031009
}
10041010

10051011
// TODO upgrade DF
@@ -1070,6 +1076,10 @@ impl ExecutionPlan for CubeTableExec {
10701076
fn maintains_input_order(&self) -> Vec<bool> {
10711077
vec![true; self.children().len()]
10721078
}
1079+
1080+
fn required_input_distribution(&self) -> Vec<Distribution> {
1081+
vec![Distribution::SinglePartition; self.children().len()]
1082+
}
10731083
}
10741084

10751085
pub fn lex_ordering_for_index(
@@ -1540,6 +1550,10 @@ impl ExecutionPlan for ClusterSendExec {
15401550
vec![false]
15411551
}
15421552
}
1553+
1554+
fn required_input_distribution(&self) -> Vec<Distribution> {
1555+
vec![Distribution::SinglePartition; self.children().len()]
1556+
}
15431557
}
15441558

15451559
impl fmt::Debug for ClusterSendExec {
@@ -1704,14 +1718,9 @@ pub fn batches_to_dataframe(batches: Vec<RecordBatch>) -> Result<DataFrame, Cube
17041718
}
17051719
}
17061720
// TODO upgrade DF
1707-
DataType::Decimal128(_, _) => convert_array!(
1708-
array,
1709-
num_rows,
1710-
rows,
1711-
Decimal128Array,
1712-
Decimal,
1713-
(Decimal)
1714-
),
1721+
DataType::Decimal128(_, _) => {
1722+
convert_array!(array, num_rows, rows, Decimal128Array, Decimal, (Decimal))
1723+
}
17151724
// DataType::Int64Decimal(1) => convert_array!(
17161725
// array,
17171726
// num_rows,

0 commit comments

Comments
 (0)